Projects
Mega:23.09
clang
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
Expand all
Collapse all
Changes of Revision 14
View file
_service:tar_scm:clang.spec
Changed
@@ -1,5 +1,6 @@ %bcond_without sys_llvm %bcond_without check +%bcond_with classic_flang %global maj_ver 17 %global min_ver 0 @@ -36,7 +37,7 @@ Name: %{pkg_name} Version: %{clang_version} -Release: 10 +Release: 15 Summary: A C language family front-end for LLVM License: NCSA @@ -52,6 +53,11 @@ Patch5: 0005-backport-Disable-InterpreterExceptionTest-on-RISC-V.patch Patch6: 0006-clang-LoongArch-Add-loongarch64-to-os-triple.patch Patch7: 0007-add-more-warning-options-to-fgcc-compatible.patch +Patch8: 0008-Backport-LoongArch-Add-the-support-for-vector.patch +Patch9: 0009-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch +Patch10: 0010-Backport-clang-Avoid-Wshadow-warning-when-init-capture-named.patch +Patch11: 0011-Add-the-support-for-classic-flang.patch +Patch12: 0012-Fix-declaration-definition-mismatch-for-classic-flang.patch # Patches for clang-tools-extra # See https://reviews.llvm.org/D120301 @@ -256,6 +262,9 @@ %else -DLLVM_LIBDIR_SUFFIX= \ %endif +%if %{with classic_flang} + -DLLVM_ENABLE_CLASSIC_FLANG=ON \ +%endif -DBUILD_FOR_OPENEULER=ON \ -DCLANG_DEFAULT_UNWINDLIB=libgcc @@ -293,6 +302,9 @@ %{install_bindir}/clang-cl %{install_bindir}/clang-cpp %{install_prefix}/share/man/man1/* +%if %{with classic_flang} +%{install_bindir}/flang +%endif %files libs %{install_libdir}/*.so.* @@ -380,6 +392,21 @@ %{install_bindir}/git-clang-format %changelog +* Wed Apr 17 2024 luofeng <luofeng13@huawei.com> -17.0.6-15 +- Add the support for classic flang + +* Tue Apr 16 2024 wangqiang <wangqiang1@kylinos.cn> -17.0.6-14 +- Avoid -Wshadow warning when init-capture named same as class field. + +* Sat Apr 13 2024 liyunfei <liyunfei33@huawei.com> -17.0.6-13 +- Add more warning options to BUILD_FOR_OPENEULER gcc compatible part 3. + +* Tue Apr 09 2024 zhanglimin <zhanglimin@loongson.cn> - 17.0.6-12 +- Improve the support for compiler-rt and fix some bugs on LoongArch. + +* Fri Mar 29 2024 zhanglimin <zhanglimin@loongson.cn> -17.0.6-11 +- Add the support for vector on LoongArch. + * Thu Mar 28 2024 liyunfei <liyunfei33@huawei.com> -17.0.6-10 - Add more warning options to BUILD_FOR_OPENEULER gcc compatible part 2.
View file
_service:tar_scm:0007-add-more-warning-options-to-fgcc-compatible.patch
Changed
@@ -5,13 +5,13 @@ --- clang/lib/Driver/ToolChains/Clang.cpp | 20 ++++++++++++++++++++ - 1 file changed, 20 insertions(+) + 1 file changed, 25 insertions(+) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 793af55a1e5f..f0da323d8adb 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp -@@ -4683,11 +4683,37 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, +@@ -4683,11 +4683,42 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, #ifdef BUILD_FOR_OPENEULER if (Args.hasFlag(options::OPT_fgcc_compatible, options::OPT_fno_gcc_compatible, false)) { @@ -41,11 +41,16 @@ + CmdArgs.push_back("-Wno-error=implicit-int"); + CmdArgs.push_back("-Wno-error=enum-constexpr-conversion"); + CmdArgs.push_back("-Wno-error=return-type"); ++ CmdArgs.push_back("-Wno-error=reserved-user-defined-literal"); + } + //other warnings + CmdArgs.push_back("-Wno-error=cast-align"); + CmdArgs.push_back("-Wno-error=enum-conversion"); + CmdArgs.push_back("-Wno-error=switch"); ++ CmdArgs.push_back("-Wno-error=cast-qual"); ++ CmdArgs.push_back("-Wno-error=varargs"); ++ CmdArgs.push_back("-Wno-error=unused-value"); ++ CmdArgs.push_back("-Wno-error=format-nonliteral"); } #endif
View file
_service:tar_scm:0008-Backport-LoongArch-Add-the-support-for-vector.patch
Added
@@ -0,0 +1,67252 @@ +From 6ff32ae0ca7a400249535b19d9ca489b44deae19 Mon Sep 17 00:00:00 2001 +From: wanglei <wanglei@loongson.cn> +Date: Wed, 9 Aug 2023 16:01:37 +0800 +Subject: PATCH 1/8 ClangLoongArch Use the ClangBuiltin class to + automatically generate support for CBE and CFE + +Fixed the type modifier (L->W), removed redundant feature checking code +since the feature has already been checked in `EmitBuiltinExpr`. And +Cleaned up unused diagnostic information. + +Reviewed By: SixWeining + +Differential Revision: https://reviews.llvm.org/D156866 + +(cherry picked from commit ea8d3b1f9f2d7385d97fcd34d14db0eb2cb2795c) +--- + .../include/clang/Basic/BuiltinsLoongArch.def | 25 ++-- + .../clang/Basic/DiagnosticSemaKinds.td | 7 - + clang/lib/CodeGen/CGBuiltin.cpp | 130 ------------------ + clang/lib/CodeGen/CodeGenFunction.h | 1 - + clang/lib/Sema/SemaChecking.cpp | 50 +------ + .../CodeGen/LoongArch/intrinsic-la32-error.c | 118 +++++++--------- + 6 files changed, 64 insertions(+), 267 deletions(-) + +diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def +index 7f2c8403410d..20510e18fe58 100644 +--- a/clang/include/clang/Basic/BuiltinsLoongArch.def ++++ b/clang/include/clang/Basic/BuiltinsLoongArch.def +@@ -16,8 +16,7 @@ + #endif + + // TODO: Support more builtins. +-// TODO: Added feature constraints. +-TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vLiULiLi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vWiUWiWi", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") + TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") +@@ -26,36 +25,36 @@ TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") + TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vLiLi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vLiLi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vWiWi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vWiWi", "nc", "64bit") + + TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iLii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iWii", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iLii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iWii", "nc", "64bit") + + TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "ULiIUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "UWiIUi", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "ULiULiIUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "UWiUWiIUi", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "ULiULiULiIUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "UWiUWiUWiIUi", "nc", "64bit") + + TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "ULiUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "UWiUi", "nc", "64bit") + TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") + TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vULiUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") + +-TARGET_BUILTIN(__builtin_loongarch_lddir_d, "LiLiIULi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vLiIULi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") + + #undef BUILTIN + #undef TARGET_BUILTIN +diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td +index c88f25209fc0..0e97620945af 100644 +--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td ++++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td +@@ -11853,10 +11853,6 @@ def err_non_designated_init_used : Error< + def err_cast_from_randomized_struct : Error< + "casting from randomized structure pointer type %0 to %1">; + +-// LoongArch-specific Diagnostics +-def err_loongarch_builtin_requires_la64 : Error< +- "this builtin requires target: loongarch64">; +- + // Unsafe buffer usage diagnostics. + def warn_unsafe_buffer_variable : Warning< + "%0 is an %select{unsafe pointer used for buffer access|unsafe buffer that " +@@ -11872,9 +11868,6 @@ def note_unsafe_buffer_variable_fixit_group : Note< + "change type of %0 to '%select{std::span|std::array|std::span::iterator}1' to preserve bounds information%select{|, and change %2 to '%select{std::span|std::array|std::span::iterator}1' to propagate bounds information between them}3">; + def note_safe_buffer_usage_suggestions_disabled : Note< + "pass -fsafe-buffer-usage-suggestions to receive code hardening suggestions">; +-def err_loongarch_builtin_requires_la32 : Error< +- "this builtin requires target: loongarch32">; +- + def err_builtin_pass_in_regs_non_class : Error< + "argument %0 is not an unqualified class type">; + +diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp +index 30f5f4e7061c..e512762fafaf 100644 +--- a/clang/lib/CodeGen/CGBuiltin.cpp ++++ b/clang/lib/CodeGen/CGBuiltin.cpp +@@ -43,7 +43,6 @@ + #include "llvm/IR/IntrinsicsARM.h" + #include "llvm/IR/IntrinsicsBPF.h" + #include "llvm/IR/IntrinsicsHexagon.h" +-#include "llvm/IR/IntrinsicsLoongArch.h" + #include "llvm/IR/IntrinsicsNVPTX.h" + #include "llvm/IR/IntrinsicsPowerPC.h" + #include "llvm/IR/IntrinsicsR600.h" +@@ -5588,9 +5587,6 @@ static Value *EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, + case llvm::Triple::riscv32: + case llvm::Triple::riscv64: + return CGF->EmitRISCVBuiltinExpr(BuiltinID, E, ReturnValue); +- case llvm::Triple::loongarch32: +- case llvm::Triple::loongarch64: +- return CGF->EmitLoongArchBuiltinExpr(BuiltinID, E); + default: + return nullptr; + } +@@ -20418,129 +20414,3 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID, + llvm::Function *F = CGM.getIntrinsic(ID, IntrinsicTypes); + return Builder.CreateCall(F, Ops, ""); + } +- +-Value *CodeGenFunction::EmitLoongArchBuiltinExpr(unsigned BuiltinID, +- const CallExpr *E) { +- SmallVector<Value *, 4> Ops; +- +- for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) +- Ops.push_back(EmitScalarExpr(E->getArg(i))); +- +- Intrinsic::ID ID = Intrinsic::not_intrinsic; +- +- switch (BuiltinID) { +- default: +- llvm_unreachable("unexpected builtin ID."); +- case LoongArch::BI__builtin_loongarch_cacop_d: +- ID = Intrinsic::loongarch_cacop_d; +- break; +- case LoongArch::BI__builtin_loongarch_cacop_w: +- ID = Intrinsic::loongarch_cacop_w; +- break; +- case LoongArch::BI__builtin_loongarch_dbar: +- ID = Intrinsic::loongarch_dbar; +- break; +- case LoongArch::BI__builtin_loongarch_break: +- ID = Intrinsic::loongarch_break; +- break; +- case LoongArch::BI__builtin_loongarch_ibar: +- ID = Intrinsic::loongarch_ibar; +- break; +- case LoongArch::BI__builtin_loongarch_movfcsr2gr: +- ID = Intrinsic::loongarch_movfcsr2gr; +- break; +- case LoongArch::BI__builtin_loongarch_movgr2fcsr: +- ID = Intrinsic::loongarch_movgr2fcsr; +- break; +- case LoongArch::BI__builtin_loongarch_syscall: +- ID = Intrinsic::loongarch_syscall; +- break; +- case LoongArch::BI__builtin_loongarch_crc_w_b_w: +- ID = Intrinsic::loongarch_crc_w_b_w; +- break; +- case LoongArch::BI__builtin_loongarch_crc_w_h_w: +- ID = Intrinsic::loongarch_crc_w_h_w; +- break; +- case LoongArch::BI__builtin_loongarch_crc_w_w_w: +- ID = Intrinsic::loongarch_crc_w_w_w; +- break; +- case LoongArch::BI__builtin_loongarch_crc_w_d_w: +- ID = Intrinsic::loongarch_crc_w_d_w; +- break; +- case LoongArch::BI__builtin_loongarch_crcc_w_b_w: +- ID = Intrinsic::loongarch_crcc_w_b_w; +- break; +- case LoongArch::BI__builtin_loongarch_crcc_w_h_w: +- ID = Intrinsic::loongarch_crcc_w_h_w; +- break; +- case LoongArch::BI__builtin_loongarch_crcc_w_w_w: +- ID = Intrinsic::loongarch_crcc_w_w_w; +- break; +- case LoongArch::BI__builtin_loongarch_crcc_w_d_w: +- ID = Intrinsic::loongarch_crcc_w_d_w; +- break; +- case LoongArch::BI__builtin_loongarch_csrrd_w: +- ID = Intrinsic::loongarch_csrrd_w; +- break; +- case LoongArch::BI__builtin_loongarch_csrwr_w: +- ID = Intrinsic::loongarch_csrwr_w; +- break; +- case LoongArch::BI__builtin_loongarch_csrxchg_w: +- ID = Intrinsic::loongarch_csrxchg_w; +- break; +- case LoongArch::BI__builtin_loongarch_csrrd_d: +- ID = Intrinsic::loongarch_csrrd_d; +- break; +- case LoongArch::BI__builtin_loongarch_csrwr_d: +- ID = Intrinsic::loongarch_csrwr_d; +- break; +- case LoongArch::BI__builtin_loongarch_csrxchg_d: +- ID = Intrinsic::loongarch_csrxchg_d; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrrd_b: +- ID = Intrinsic::loongarch_iocsrrd_b; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrrd_h: +- ID = Intrinsic::loongarch_iocsrrd_h; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrrd_w: +- ID = Intrinsic::loongarch_iocsrrd_w; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrrd_d: +- ID = Intrinsic::loongarch_iocsrrd_d; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrwr_b: +- ID = Intrinsic::loongarch_iocsrwr_b; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrwr_h: +- ID = Intrinsic::loongarch_iocsrwr_h; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrwr_w: +- ID = Intrinsic::loongarch_iocsrwr_w; +- break; +- case LoongArch::BI__builtin_loongarch_iocsrwr_d: +- ID = Intrinsic::loongarch_iocsrwr_d; +- break; +- case LoongArch::BI__builtin_loongarch_cpucfg: +- ID = Intrinsic::loongarch_cpucfg; +- break; +- case LoongArch::BI__builtin_loongarch_asrtle_d: +- ID = Intrinsic::loongarch_asrtle_d; +- break; +- case LoongArch::BI__builtin_loongarch_asrtgt_d: +- ID = Intrinsic::loongarch_asrtgt_d; +- break; +- case LoongArch::BI__builtin_loongarch_lddir_d: +- ID = Intrinsic::loongarch_lddir_d; +- break; +- case LoongArch::BI__builtin_loongarch_ldpte_d: +- ID = Intrinsic::loongarch_ldpte_d; +- break; +- // TODO: Support more Intrinsics. +- } +- +- assert(ID != Intrinsic::not_intrinsic); +- +- llvm::Function *F = CGM.getIntrinsic(ID); +- return Builder.CreateCall(F, Ops); +-} +diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h +index 8722fd4550e4..143e0707b942 100644 +--- a/clang/lib/CodeGen/CodeGenFunction.h ++++ b/clang/lib/CodeGen/CodeGenFunction.h +@@ -4316,7 +4316,6 @@ public: + llvm::Value *EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + llvm::Value *EmitRISCVBuiltinExpr(unsigned BuiltinID, const CallExpr *E, + ReturnValueSlot ReturnValue); +- llvm::Value *EmitLoongArchBuiltinExpr(unsigned BuiltinID, const CallExpr *E); + void ProcessOrderScopeAMDGCN(llvm::Value *Order, llvm::Value *Scope, + llvm::AtomicOrdering &AO, + llvm::SyncScope::ID &SSID); +diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp +index a94f009f3fa6..a8416bf4de92 100644 +--- a/clang/lib/Sema/SemaChecking.cpp ++++ b/clang/lib/Sema/SemaChecking.cpp +@@ -3827,39 +3827,12 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + default: + break; + case LoongArch::BI__builtin_loongarch_cacop_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); +- fallthrough; + case LoongArch::BI__builtin_loongarch_cacop_w: { +- if (BuiltinID == LoongArch::BI__builtin_loongarch_cacop_w && +- !TI.hasFeature("32bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la32) +- << TheCall->getSourceRange(); + SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(5)); + SemaBuiltinConstantArgRange(TheCall, 2, llvm::minIntN(12), + llvm::maxIntN(12)); + break; + } +- case LoongArch::BI__builtin_loongarch_crc_w_b_w: +- case LoongArch::BI__builtin_loongarch_crc_w_h_w: +- case LoongArch::BI__builtin_loongarch_crc_w_w_w: +- case LoongArch::BI__builtin_loongarch_crc_w_d_w: +- case LoongArch::BI__builtin_loongarch_crcc_w_b_w: +- case LoongArch::BI__builtin_loongarch_crcc_w_h_w: +- case LoongArch::BI__builtin_loongarch_crcc_w_w_w: +- case LoongArch::BI__builtin_loongarch_crcc_w_d_w: +- case LoongArch::BI__builtin_loongarch_iocsrrd_d: +- case LoongArch::BI__builtin_loongarch_iocsrwr_d: +- case LoongArch::BI__builtin_loongarch_asrtle_d: +- case LoongArch::BI__builtin_loongarch_asrtgt_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); +- break; + case LoongArch::BI__builtin_loongarch_break: + case LoongArch::BI__builtin_loongarch_dbar: + case LoongArch::BI__builtin_loongarch_ibar: +@@ -3867,35 +3840,16 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + // Check if immediate is in 0, 32767. + return SemaBuiltinConstantArgRange(TheCall, 0, 0, 32767); + case LoongArch::BI__builtin_loongarch_csrrd_w: +- return SemaBuiltinConstantArgRange(TheCall, 0, 0, 16383); +- case LoongArch::BI__builtin_loongarch_csrwr_w: +- return SemaBuiltinConstantArgRange(TheCall, 1, 0, 16383); +- case LoongArch::BI__builtin_loongarch_csrxchg_w: +- return SemaBuiltinConstantArgRange(TheCall, 2, 0, 16383); + case LoongArch::BI__builtin_loongarch_csrrd_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); + return SemaBuiltinConstantArgRange(TheCall, 0, 0, 16383); ++ case LoongArch::BI__builtin_loongarch_csrwr_w: + case LoongArch::BI__builtin_loongarch_csrwr_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 16383); ++ case LoongArch::BI__builtin_loongarch_csrxchg_w: + case LoongArch::BI__builtin_loongarch_csrxchg_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); + return SemaBuiltinConstantArgRange(TheCall, 2, 0, 16383); + case LoongArch::BI__builtin_loongarch_lddir_d: + case LoongArch::BI__builtin_loongarch_ldpte_d: +- if (!TI.hasFeature("64bit")) +- return Diag(TheCall->getBeginLoc(), +- diag::err_loongarch_builtin_requires_la64) +- << TheCall->getSourceRange(); + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); + case LoongArch::BI__builtin_loongarch_movfcsr2gr: + case LoongArch::BI__builtin_loongarch_movgr2fcsr: +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c b/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c +index 0264c2948934..db113a13eb5a 100644 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c ++++ b/clang/test/CodeGen/LoongArch/intrinsic-la32-error.c +@@ -1,9 +1,58 @@ + // RUN: %clang_cc1 -triple loongarch32 -emit-llvm -S -verify %s -o /dev/null ++// RUN: not %clang_cc1 -triple loongarch32 -DFEATURE_CHECK -emit-llvm %s 2>&1 \ ++// RUN: | FileCheck %s + + #include <larchintrin.h> + ++#ifdef FEATURE_CHECK ++void test_feature(long *v_l, unsigned long *v_ul, int *v_i, unsigned ui, char c, short s) { ++// CHECK: error: '__builtin_loongarch_cacop_d' needs target feature 64bit ++ __builtin_loongarch_cacop_d(1, v_ul0, 1024); ++ ++// CHECK: error: '__builtin_loongarch_crc_w_b_w' needs target feature 64bit ++ v_i0 = __builtin_loongarch_crc_w_b_w(c, v_i0); ++// CHECK: error: '__builtin_loongarch_crc_w_h_w' needs target feature 64bit ++ v_i1 = __builtin_loongarch_crc_w_h_w(c, v_i0); ++// CHECK: error: '__builtin_loongarch_crc_w_w_w' needs target feature 64bit ++ v_i2 = __builtin_loongarch_crc_w_w_w(c, v_i0); ++// CHECK: error: '__builtin_loongarch_crc_w_d_w' needs target feature 64bit ++ v_i3 = __builtin_loongarch_crc_w_d_w(c, v_i0); ++ ++// CHECK: error: '__builtin_loongarch_crcc_w_b_w' needs target feature 64bit ++ v_i4 = __builtin_loongarch_crcc_w_b_w(c, v_i0); ++// CHECK: error: '__builtin_loongarch_crcc_w_h_w' needs target feature 64bit ++ v_i5 = __builtin_loongarch_crcc_w_h_w(s, v_i0); ++// CHECK: error: '__builtin_loongarch_crcc_w_w_w' needs target feature 64bit ++ v_i6 = __builtin_loongarch_crcc_w_w_w(v_i0, v_i1); ++// CHECK: error: '__builtin_loongarch_crcc_w_d_w' needs target feature 64bit ++ v_i7 = __builtin_loongarch_crcc_w_d_w(v_l0, v_i0); ++ ++// CHECK: error: '__builtin_loongarch_csrrd_d' needs target feature 64bit ++ v_ul0 = __builtin_loongarch_csrrd_d(1); ++// CHECK: error: '__builtin_loongarch_csrwr_d' needs target feature 64bit ++ v_ul1 = __builtin_loongarch_csrwr_d(v_ul0, 1); ++// CHECK: error: '__builtin_loongarch_csrxchg_d' needs target feature 64bit ++ v_ul2 = __builtin_loongarch_csrxchg_d(v_ul0, v_ul1, 1); ++ ++ ++// CHECK: error: '__builtin_loongarch_iocsrrd_d' needs target feature 64bit ++ v_ul3 = __builtin_loongarch_iocsrrd_d(ui); ++// CHECK: error: '__builtin_loongarch_iocsrwr_d' needs target feature 64bit ++ __builtin_loongarch_iocsrwr_d(v_ul0, ui); ++ ++// CHECK: error: '__builtin_loongarch_asrtle_d' needs target feature 64bit ++ __builtin_loongarch_asrtle_d(v_l0, v_l1); ++// CHECK: error: '__builtin_loongarch_asrtgt_d' needs target feature 64bit ++ __builtin_loongarch_asrtgt_d(v_l0, v_l1); ++ ++// CHECK: error: '__builtin_loongarch_lddir_d' needs target feature 64bit ++ v_ul4 = __builtin_loongarch_lddir_d(v_l0, 1); ++// CHECK: error: '__builtin_loongarch_ldpte_d' needs target feature 64bit ++ __builtin_loongarch_ldpte_d(v_l0, 1); ++} ++#endif ++ + void cacop_d(unsigned long int a) { +- __builtin_loongarch_cacop_d(1, a, 1024); // expected-error {{this builtin requires target: loongarch64}} + __builtin_loongarch_cacop_w(-1, a, 1024); // expected-error {{argument value -1 is outside the valid range 0, 31}} + __builtin_loongarch_cacop_w(32, a, 1024); // expected-error {{argument value 32 is outside the valid range 0, 31}} + __builtin_loongarch_cacop_w(1, a, -4096); // expected-error {{argument value -4096 is outside the valid range -2048, 2047}} +@@ -47,49 +96,6 @@ void syscall(int a) { + __builtin_loongarch_syscall(a); // expected-error {{argument to '__builtin_loongarch_syscall' must be a constant integer}} + } + +-int crc_w_b_w(char a, int b) { +- return __builtin_loongarch_crc_w_b_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crc_w_h_w(short a, int b) { +- return __builtin_loongarch_crc_w_h_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crc_w_w_w(int a, int b) { +- return __builtin_loongarch_crc_w_w_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crc_w_d_w(long int a, int b) { +- return __builtin_loongarch_crc_w_d_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +-int crcc_w_b_w(char a, int b) { +- return __builtin_loongarch_crcc_w_b_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crcc_w_h_w(short a, int b) { +- return __builtin_loongarch_crcc_w_h_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crcc_w_w_w(int a, int b) { +- return __builtin_loongarch_crcc_w_w_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-int crcc_w_d_w(long int a, int b) { +- return __builtin_loongarch_crcc_w_d_w(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-unsigned long int csrrd_d() { +- return __builtin_loongarch_csrrd_d(1); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-unsigned long int csrwr_d(unsigned long int a) { +- return __builtin_loongarch_csrwr_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-unsigned long int csrxchg_d(unsigned long int a, unsigned long int b) { +- return __builtin_loongarch_csrxchg_d(a, b, 1); // expected-error {{this builtin requires target: loongarch64}} +-} +- + void csrrd_w(int a) { + __builtin_loongarch_csrrd_w(16384); // expected-error {{argument value 16384 is outside the valid range 0, 16383}} + __builtin_loongarch_csrrd_w(-1); // expected-error {{argument value 4294967295 is outside the valid range 0, 16383}} +@@ -108,30 +114,6 @@ void csrxchg_w(unsigned int a, unsigned int b) { + __builtin_loongarch_csrxchg_w(a, b, b); // expected-error {{argument to '__builtin_loongarch_csrxchg_w' must be a constant integer}} + } + +-unsigned long int iocsrrd_d(unsigned int a) { +- return __builtin_loongarch_iocsrrd_d(a); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-void iocsrwr_d(unsigned long int a, unsigned int b) { +- __builtin_loongarch_iocsrwr_d(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-void asrtle_d(long int a, long int b) { +- __builtin_loongarch_asrtle_d(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-void asrtgt_d(long int a, long int b) { +- __builtin_loongarch_asrtgt_d(a, b); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-void lddir_d(long int a, int b) { +- __builtin_loongarch_lddir_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} +-} +- +-void ldpte_d(long int a, int b) { +- __builtin_loongarch_ldpte_d(a, 1); // expected-error {{this builtin requires target: loongarch64}} +-} +- + void rdtime_d() { + __rdtime_d(); // expected-error {{call to undeclared function '__rdtime_d'}} + } +-- +2.20.1 + + +From ec68e541ae9c22826292d5db1bf2ce9e39b6a57b Mon Sep 17 00:00:00 2001 +From: licongtian <licongtian@loongson.cn> +Date: Wed, 20 Sep 2023 11:21:56 +0800 +Subject: PATCH 2/8 ClangLoongArch Support compiler options -mlsx/-mlasx + for clang + +This patch adds compiler options -mlsx/-mlasx which enables the +instruction sets of LSX and LASX, and sets related predefined macros +according to the options. + +(cherry picked from commit 8d4e35600f3ba90997a59fdb9baeb196e723eec9) +--- + .../clang/Basic/DiagnosticDriverKinds.td | 6 +++ + clang/include/clang/Driver/Options.td | 10 +++++ + clang/lib/Basic/Targets/LoongArch.cpp | 12 +++++- + clang/lib/Basic/Targets/LoongArch.h | 4 ++ + .../lib/Driver/ToolChains/Arch/LoongArch.cpp | 32 +++++++++++++++ + clang/test/Driver/loongarch-mlasx-error.c | 15 +++++++ + clang/test/Driver/loongarch-mlasx.c | 37 +++++++++++++++++ + clang/test/Driver/loongarch-mlsx-error.c | 12 ++++++ + clang/test/Driver/loongarch-mlsx.c | 41 +++++++++++++++++++ + clang/test/Preprocessor/init-loongarch.c | 35 ++++++++++++++++ + 10 files changed, 203 insertions(+), 1 deletion(-) + create mode 100644 clang/test/Driver/loongarch-mlasx-error.c + create mode 100644 clang/test/Driver/loongarch-mlasx.c + create mode 100644 clang/test/Driver/loongarch-mlsx-error.c + create mode 100644 clang/test/Driver/loongarch-mlsx.c + +diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td +index 1b69324d073a..8c751f2c4bda 100644 +--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td ++++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td +@@ -732,6 +732,12 @@ def warn_drv_loongarch_conflicting_implied_val : Warning< + InGroup<OptionIgnored>; + def err_drv_loongarch_invalid_mfpu_EQ : Error< + "invalid argument '%0' to -mfpu=; must be one of: 64, 32, none, 0 (alias for none)">; ++def err_drv_loongarch_wrong_fpu_width_for_lsx : Error< ++ "wrong fpu width; LSX depends on 64-bit FPU.">; ++def err_drv_loongarch_wrong_fpu_width_for_lasx : Error< ++ "wrong fpu width; LASX depends on 64-bit FPU.">; ++def err_drv_loongarch_invalid_simd_option_combination : Error< ++ "invalid option combination; LASX depends on LSX.">; + + def err_drv_expand_response_file : Error< + "failed to expand response file: %0">; +diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td +index d4f7315bf8cb..bb4928293c45 100644 +--- a/clang/include/clang/Driver/Options.td ++++ b/clang/include/clang/Driver/Options.td +@@ -192,6 +192,8 @@ def m_x86_Features_Group : OptionGroup<"<x86 features group>">, + Group<m_Group>, Flags<CoreOption>, DocName<"X86">; + def m_riscv_Features_Group : OptionGroup<"<riscv features group>">, + Group<m_Group>, DocName<"RISC-V">; ++def m_loongarch_Features_Group : OptionGroup<"<loongarch features group>">, ++ Group<m_Group>, DocName<"LoongArch">; + + def m_libc_Group : OptionGroup<"<m libc group>">, Group<m_mips_Features_Group>, + Flags<HelpHidden>; +@@ -4196,6 +4198,14 @@ def mstack_protector_guard_reg_EQ : Joined<"-", "mstack-protector-guard-reg="> + def mfentry : Flag<"-", "mfentry">, HelpText<"Insert calls to fentry at function entry (x86/SystemZ only)">, + Flags<CC1Option>, Group<m_Group>, + MarshallingInfoFlag<CodeGenOpts<"CallFEntry">>; ++def mlsx : Flag<"-", "mlsx">, Group<m_loongarch_Features_Group>, ++ HelpText<"Enable Loongson SIMD Extension (LSX).">; ++def mno_lsx : Flag<"-", "mno-lsx">, Group<m_loongarch_Features_Group>, ++ HelpText<"Disable Loongson SIMD Extension (LSX).">; ++def mlasx : Flag<"-", "mlasx">, Group<m_loongarch_Features_Group>, ++ HelpText<"Enable Loongson Advanced SIMD Extension (LASX).">; ++def mno_lasx : Flag<"-", "mno-lasx">, Group<m_loongarch_Features_Group>, ++ HelpText<"Disable Loongson Advanced SIMD Extension (LASX).">; + def mnop_mcount : Flag<"-", "mnop-mcount">, HelpText<"Generate mcount/__fentry__ calls as nops. To activate they need to be patched in.">, + Flags<CC1Option>, Group<m_Group>, + MarshallingInfoFlag<CodeGenOpts<"MNopMCount">>; +diff --git a/clang/lib/Basic/Targets/LoongArch.cpp b/clang/lib/Basic/Targets/LoongArch.cpp +index 4448a2ae10a1..88537989a051 100644 +--- a/clang/lib/Basic/Targets/LoongArch.cpp ++++ b/clang/lib/Basic/Targets/LoongArch.cpp +@@ -208,6 +208,11 @@ void LoongArchTargetInfo::getTargetDefines(const LangOptions &Opts, + TuneCPU = ArchName; + Builder.defineMacro("__loongarch_tune", Twine('"') + TuneCPU + Twine('"')); + ++ if (HasFeatureLSX) ++ Builder.defineMacro("__loongarch_sx", Twine(1)); ++ if (HasFeatureLASX) ++ Builder.defineMacro("__loongarch_asx", Twine(1)); ++ + StringRef ABI = getABI(); + if (ABI == "lp64d" || ABI == "lp64f" || ABI == "lp64s") + Builder.defineMacro("__loongarch_lp64"); +@@ -257,6 +262,8 @@ bool LoongArchTargetInfo::hasFeature(StringRef Feature) const { + .Case("loongarch64", Is64Bit) + .Case("32bit", !Is64Bit) + .Case("64bit", Is64Bit) ++ .Case("lsx", HasFeatureLSX) ++ .Case("lasx", HasFeatureLASX) + .Default(false); + } + +@@ -274,7 +281,10 @@ bool LoongArchTargetInfo::handleTargetFeatures( + if (Feature == "+d") { + HasFeatureD = true; + } +- } ++ } else if (Feature == "+lsx") ++ HasFeatureLSX = true; ++ else if (Feature == "+lasx") ++ HasFeatureLASX = true; + } + return true; + } +diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h +index 34143f462a24..8f4150b2539d 100644 +--- a/clang/lib/Basic/Targets/LoongArch.h ++++ b/clang/lib/Basic/Targets/LoongArch.h +@@ -27,12 +27,16 @@ protected: + std::string CPU; + bool HasFeatureD; + bool HasFeatureF; ++ bool HasFeatureLSX; ++ bool HasFeatureLASX; + + public: + LoongArchTargetInfo(const llvm::Triple &Triple, const TargetOptions &) + : TargetInfo(Triple) { + HasFeatureD = false; + HasFeatureF = false; ++ HasFeatureLSX = false; ++ HasFeatureLASX = false; + LongDoubleWidth = 128; + LongDoubleAlign = 128; + LongDoubleFormat = &llvm::APFloat::IEEEquad(); +diff --git a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +index 65925e9ed610..31153a67ad28 100644 +--- a/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp ++++ b/clang/lib/Driver/ToolChains/Arch/LoongArch.cpp +@@ -175,6 +175,38 @@ void loongarch::getLoongArchTargetFeatures(const Driver &D, + A->ignoreTargetSpecific(); + if (Arg *A = Args.getLastArgNoClaim(options::OPT_mfpu_EQ)) + A->ignoreTargetSpecific(); ++ ++ // Select lsx feature determined by -mno-lsx. ++ if (const Arg *A = Args.getLastArg(options::OPT_mlsx, options::OPT_mno_lsx)) { ++ // LSX depends on 64-bit FPU. ++ // -m*-float and -mfpu=none/0/32 conflict with -mlsx. ++ if (A->getOption().matches(options::OPT_mlsx)) { ++ if (llvm::find(Features, "-d") != Features.end()) ++ D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lsx); ++ else /*-mlsx*/ ++ Features.push_back("+lsx"); ++ } else /*-mno-lsx*/ { ++ Features.push_back("-lsx"); ++ } ++ } ++ ++ // Select lasx feature determined by -mno-lasx. ++ if (const Arg *A = ++ Args.getLastArg(options::OPT_mlasx, options::OPT_mno_lasx)) { ++ // LASX depends on 64-bit FPU and LSX. ++ // -mno-lsx conflicts with -mlasx. ++ if (A->getOption().matches(options::OPT_mlasx)) { ++ if (llvm::find(Features, "-d") != Features.end()) ++ D.Diag(diag::err_drv_loongarch_wrong_fpu_width_for_lasx); ++ else if (llvm::find(Features, "-lsx") != Features.end()) ++ D.Diag(diag::err_drv_loongarch_invalid_simd_option_combination); ++ else { /*-mlasx*/ ++ Features.push_back("+lsx"); ++ Features.push_back("+lasx"); ++ } ++ } else /*-mno-lasx*/ ++ Features.push_back("-lasx"); ++ } + } + + std::string loongarch::postProcessTargetCPUString(const std::string &CPU, +diff --git a/clang/test/Driver/loongarch-mlasx-error.c b/clang/test/Driver/loongarch-mlasx-error.c +new file mode 100644 +index 000000000000..e66f277f7c29 +--- /dev/null ++++ b/clang/test/Driver/loongarch-mlasx-error.c +@@ -0,0 +1,15 @@ ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -msingle-float 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -msoft-float 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=32 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=0 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mfpu=none 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlasx -mno-lsx 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LASX_FPU128 %s ++ ++// ERROR_LASX_FPU64: error: wrong fpu width; LASX depends on 64-bit FPU. ++// ERROR_LASX_FPU128: error: invalid option combination; LASX depends on LSX. +diff --git a/clang/test/Driver/loongarch-mlasx.c b/clang/test/Driver/loongarch-mlasx.c +new file mode 100644 +index 000000000000..0b934f125c9e +--- /dev/null ++++ b/clang/test/Driver/loongarch-mlasx.c +@@ -0,0 +1,37 @@ ++/// Test -mno-lasx options. ++ ++// RUN: %clang --target=loongarch64 -mlasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LASX ++// RUN: %clang --target=loongarch64 -mno-lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-NOLASX ++// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-NOLASX ++// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LASX ++// RUN: %clang --target=loongarch64 -mlsx -mlasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LASX ++// RUN: %clang --target=loongarch64 -mlasx -mlsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LASX ++ ++// RUN: %clang --target=loongarch64 -mlasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LASX ++// RUN: %clang --target=loongarch64 -mno-lasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-NOLASX ++// RUN: %clang --target=loongarch64 -mlasx -mno-lasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-NOLASX ++// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LASX ++// RUN: %clang --target=loongarch64 -mlsx -mlasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LASX ++// RUN: %clang --target=loongarch64 -mlasx -mlsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LASX ++ ++// CC1-LASX: "-target-feature" "+lsx" "-target-feature" "+lasx" ++// CC1-NOLASX: "-target-feature" "-lasx" ++ ++// IR-LASX: attributes ## ={{.*}}"target-features"="{{(.*,)?}}+lasx{{(,.*)?}}" ++// IR-NOLASX: attributes ## ={{.*}}"target-features"="{{(.*,)?}}-lasx{{(,.*)?}}" ++ ++int foo(void){ ++ return 3; ++} +diff --git a/clang/test/Driver/loongarch-mlsx-error.c b/clang/test/Driver/loongarch-mlsx-error.c +new file mode 100644 +index 000000000000..bd6b8e2718bf +--- /dev/null ++++ b/clang/test/Driver/loongarch-mlsx-error.c +@@ -0,0 +1,12 @@ ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -msingle-float 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -msoft-float 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=32 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=0 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s ++// RUN: not %clang --target=loongarch64 %s -fsyntax-only -mlsx -mfpu=none 2>&1 \ ++// RUN: FileCheck --check-prefix=ERROR_LSX_FPU64 %s ++ ++// ERROR_LSX_FPU64: error: wrong fpu width; LSX depends on 64-bit FPU. +diff --git a/clang/test/Driver/loongarch-mlsx.c b/clang/test/Driver/loongarch-mlsx.c +new file mode 100644 +index 000000000000..7d4307b078e1 +--- /dev/null ++++ b/clang/test/Driver/loongarch-mlsx.c +@@ -0,0 +1,41 @@ ++/// Test -mno-lsx options. ++ ++// RUN: %clang --target=loongarch64 -mlsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LSX ++// RUN: %clang --target=loongarch64 -mno-lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-NOLSX ++// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-NOLSX ++// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LSX ++// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LSX ++// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-LSX ++// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -fsyntax-only %s -### 2>&1 | \ ++// RUN: FileCheck %s --check-prefix=CC1-NOLSX ++ ++// RUN: %clang --target=loongarch64 -mlsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LSX ++// RUN: %clang --target=loongarch64 -mno-lsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-NOLSX ++// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-NOLSX ++// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LSX ++// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LSX ++// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-LSX ++// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -S -emit-llvm %s -o - | \ ++// RUN: FileCheck %s --check-prefix=IR-NOLSX ++ ++// CC1-LSX: "-target-feature" "+lsx" ++// CC1-NOLSX: "-target-feature" "-lsx" ++ ++// IR-LSX: attributes ## ={{.*}}"target-features"="{{(.*,)?}}+lsx{{(,.*)?}}" ++// IR-NOLSX: attributes ## ={{.*}}"target-features"="{{(.*,)?}}-lsx{{(,.*)?}}" ++ ++int foo(void){ ++ return 3; ++} +diff --git a/clang/test/Preprocessor/init-loongarch.c b/clang/test/Preprocessor/init-loongarch.c +index 4ef42a921ec0..e235a7283021 100644 +--- a/clang/test/Preprocessor/init-loongarch.c ++++ b/clang/test/Preprocessor/init-loongarch.c +@@ -807,3 +807,38 @@ + + // ARCH-TUNE: #define __loongarch_arch "ARCH" + // ARCH-TUNE: #define __loongarch_tune "TUNE" ++ ++// RUN: %clang --target=loongarch64 -mlsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s ++// RUN: %clang --target=loongarch64 -mno-lsx -mlsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s ++// RUN: %clang --target=loongarch64 -mlsx -mno-lasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s ++// RUN: %clang --target=loongarch64 -mno-lasx -mlsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLSX %s ++// MLSX-NOT: #define __loongarch_asx ++// MLSX: #define __loongarch_sx 1 ++ ++// RUN: %clang --target=loongarch64 -mlasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s ++// RUN: %clang --target=loongarch64 -mno-lasx -mlasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s ++// RUN: %clang --target=loongarch64 -mlsx -mlasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s ++// RUN: %clang --target=loongarch64 -mlasx -mlsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MLASX %s ++// MLASX: #define __loongarch_asx 1 ++// MLASX: #define __loongarch_sx 1 ++ ++// RUN: %clang --target=loongarch64 -mno-lsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s ++// RUN: %clang --target=loongarch64 -mlsx -mno-lsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s ++// RUN: %clang --target=loongarch64 -mno-lsx -mno-lasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s ++// RUN: %clang --target=loongarch64 -mno-lasx -mno-lsx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s ++// RUN: %clang --target=loongarch64 -mno-lasx -x c -E -dM %s -o - \ ++// RUN: | FileCheck --match-full-lines --check-prefix=MNO-LSX %s ++// MNO-LSX-NOT: #define __loongarch_asx ++// MNO-LSX-NOT: #define __loongarch_sx +-- +2.20.1 + + +From 142597c7df9a7e6debcaffede2057eb0a7c3b33b Mon Sep 17 00:00:00 2001 +From: licongtian <licongtian@loongson.cn> +Date: Wed, 25 Oct 2023 17:35:32 +0800 +Subject: PATCH 3/8 ClangLoongArch Add ABI implementation of passing + vectors + +(cherry picked from commit eb49b86f5a9b54b0e3c37024334a3c6f6ca88e14) +--- + clang/lib/CodeGen/Targets/LoongArch.cpp | 7 +++++++ + 1 file changed, 7 insertions(+) + +diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp +index 7483bf6d6d1e..26c68c3583b2 100644 +--- a/clang/lib/CodeGen/Targets/LoongArch.cpp ++++ b/clang/lib/CodeGen/Targets/LoongArch.cpp +@@ -321,6 +321,13 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + return ABIArgInfo::getDirect(); + } + ++ // Pass 128-bit/256-bit vector values via vector registers directly. ++ if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) && ++ (getTarget().hasFeature("lsx"))) || ++ ((getContext().getTypeSize(Ty) == 256) && ++ getTarget().hasFeature("lasx")))) ++ return ABIArgInfo::getDirect(); ++ + // Complex types for the *f or *d ABI must be passed directly rather than + // using CoerceAndExpand. + if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) { +-- +2.20.1 + + +From f2a409f5da055510125cae5fcf84a6b7d255e64a Mon Sep 17 00:00:00 2001 +From: licongtian <licongtian@loongson.cn> +Date: Wed, 25 Oct 2023 17:41:03 +0800 +Subject: PATCH 4/8 ClangLoongArch Support the builtin functions for LSX + +This patch does the following work: +- Define the builtin functions for LSX +- Add the header file lsxintrin.h +- Add the immediate number range checking for LSX builtins + +(cherry picked from commit d6bfa3341181a80de6c8aede807fc1acc3ce8d9b) +--- + .../include/clang/Basic/BuiltinsLoongArch.def | 43 +- + .../clang/Basic/BuiltinsLoongArchBase.def | 53 + + .../clang/Basic/BuiltinsLoongArchLSX.def | 953 +++++ + clang/lib/Headers/CMakeLists.txt | 1 + + clang/lib/Headers/lsxintrin.h | 3726 +++++++++++++++++ + clang/lib/Sema/SemaChecking.cpp | 229 +- + 6 files changed, 4965 insertions(+), 40 deletions(-) + create mode 100644 clang/include/clang/Basic/BuiltinsLoongArchBase.def + create mode 100644 clang/include/clang/Basic/BuiltinsLoongArchLSX.def + create mode 100644 clang/lib/Headers/lsxintrin.h + +diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def +index 20510e18fe58..9ec19c31095a 100644 +--- a/clang/include/clang/Basic/BuiltinsLoongArch.def ++++ b/clang/include/clang/Basic/BuiltinsLoongArch.def +@@ -15,46 +15,11 @@ + # define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS) + #endif + +-// TODO: Support more builtins. +-TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vWiUWiWi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") +-TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_movfcsr2gr, "UiIUi", "nc", "f") +-TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") +-TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vWiWi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vWiWi", "nc", "64bit") ++// Definition of LoongArch basic builtins. ++#include "clang/Basic/BuiltinsLoongArchBase.def" + +-TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iWii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iWii", "nc", "64bit") +- +-TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "UWiIUi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "UWiUWiIUi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "UWiUWiUWiIUi", "nc", "64bit") +- +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "UWiUi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") +-TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") +- +-TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") +-TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") ++// Definition of LSX builtins. ++#include "clang/Basic/BuiltinsLoongArchLSX.def" + + #undef BUILTIN + #undef TARGET_BUILTIN +diff --git a/clang/include/clang/Basic/BuiltinsLoongArchBase.def b/clang/include/clang/Basic/BuiltinsLoongArchBase.def +new file mode 100644 +index 000000000000..cbb239223aae +--- /dev/null ++++ b/clang/include/clang/Basic/BuiltinsLoongArchBase.def +@@ -0,0 +1,53 @@ ++//============------------ BuiltinsLoongArchBase.def -------------*- C++ -*-==// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the LoongArch-specific basic builtin function database. ++// Users of this file must define the BUILTIN macro to make use of this ++// information. ++// ++//===----------------------------------------------------------------------===// ++ ++TARGET_BUILTIN(__builtin_loongarch_cacop_d, "vWiUWiWi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_cacop_w, "viUii", "nc", "32bit") ++TARGET_BUILTIN(__builtin_loongarch_dbar, "vIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_ibar, "vIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_movfcsr2gr, "UiIUi", "nc", "f") ++TARGET_BUILTIN(__builtin_loongarch_movgr2fcsr, "vIUiUi", "nc", "f") ++TARGET_BUILTIN(__builtin_loongarch_break, "vIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_syscall, "vIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_cpucfg, "UiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_asrtle_d, "vWiWi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_asrtgt_d, "vWiWi", "nc", "64bit") ++ ++TARGET_BUILTIN(__builtin_loongarch_crc_w_b_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crc_w_h_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crc_w_w_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crc_w_d_w, "iWii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crcc_w_b_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crcc_w_h_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crcc_w_w_w, "iii", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_crcc_w_d_w, "iWii", "nc", "64bit") ++ ++TARGET_BUILTIN(__builtin_loongarch_csrrd_w, "UiIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_csrrd_d, "UWiIUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_csrwr_w, "UiUiIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_csrwr_d, "UWiUWiIUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_csrxchg_w, "UiUiUiIUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_csrxchg_d, "UWiUWiUWiIUi", "nc", "64bit") ++ ++TARGET_BUILTIN(__builtin_loongarch_iocsrrd_b, "UiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrrd_h, "UiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrrd_w, "UiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrrd_d, "UWiUi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_iocsrwr_b, "vUiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrwr_h, "vUiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrwr_w, "vUiUi", "nc", "") ++TARGET_BUILTIN(__builtin_loongarch_iocsrwr_d, "vUWiUi", "nc", "64bit") ++ ++TARGET_BUILTIN(__builtin_loongarch_lddir_d, "WiWiIUWi", "nc", "64bit") ++TARGET_BUILTIN(__builtin_loongarch_ldpte_d, "vWiIUWi", "nc", "64bit") +diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLSX.def b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def +new file mode 100644 +index 000000000000..8e6aec886c50 +--- /dev/null ++++ b/clang/include/clang/Basic/BuiltinsLoongArchLSX.def +@@ -0,0 +1,953 @@ ++//=============------------- BuiltinsLoongArchLSX.def --------------- C++ -*-=// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the LoongArch-specific LSX builtin function database. ++// Users of this file must define the BUILTIN macro to make use of this ++// information. ++// ++//===----------------------------------------------------------------------===// ++ ++TARGET_BUILTIN(__builtin_lsx_vadd_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadd_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadd_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadd_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadd_q, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsub_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsub_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsub_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsub_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsub_q, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddi_bu, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddi_hu, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddi_wu, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddi_du, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsubi_bu, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubi_hu, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubi_wu, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubi_du, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vneg_b, "V16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vneg_h, "V8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vneg_w, "V4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vneg_d, "V2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsadd_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsadd_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsadd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssub_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssub_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssub_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vhaddw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vhaddw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhaddw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vhsubw_h_b, "V8SsV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_w_h, "V4SiV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_d_w, "V2SLLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vhsubw_hu_bu, "V8UsV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_wu_hu, "V4UiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_du_wu, "V2ULLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vhsubw_qu_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwev_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwod_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsubwev_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsubwod_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsubwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsubwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsubwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vaddwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vaddwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vavg_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vavg_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavg_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vavgr_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vavgr_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vavgr_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vabsd_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vabsd_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vabsd_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vadda_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadda_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadda_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vadda_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmax_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaxi_b, "V16ScV16ScIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_h, "V8SsV8SsIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_w, "V4SiV4SiIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_d, "V2SLLiV2SLLiIi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmax_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmax_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaxi_bu, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_hu, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_wu, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaxi_du, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmin_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmini_b, "V16ScV16ScIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_h, "V8SsV8SsIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_w, "V4SiV4SiIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_d, "V2SLLiV2SLLiIi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmin_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmin_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmini_bu, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_hu, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_wu, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmini_du, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmul_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmul_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmul_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmul_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmuh_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmuh_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmuh_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwev_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwod_h_b, "V8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_w_h, "V4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_d_w, "V2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_q_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu, "V8sV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu, "V4SiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu, "V2LLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du, "V2LLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwev_h_bu_b, "V8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwev_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmulwod_h_bu_b, "V8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_w_hu_h, "V4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_d_wu_w, "V2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmulwod_q_du_d, "V2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmadd_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmadd_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmadd_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmadd_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmsub_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmsub_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmsub_w, "V4SiV4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmsub_d, "V2SLLiV2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_b, "V8sV8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_b, "V8sV8sV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_h, "V4SiV4SiV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_w, "V2LLiV2LLiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu, "V8UsV8UsV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu, "V4UiV4UiV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu, "V2ULLiV2ULLiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du, "V2ULLiV2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwev_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_h_bu_b, "V8sV8sV16UcV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_w_hu_h, "V4SiV4SiV8UsV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_d_wu_w, "V2LLiV2LLiV4UiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmaddwod_q_du_d, "V2LLiV2LLiV2ULLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vdiv_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vdiv_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vdiv_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmod_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++ ++TARGET_BUILTIN(__builtin_lsx_vmod_bu, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_hu, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_wu, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmod_du, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsat_b, "V16ScV16ScIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_h, "V8SsV8SsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_w, "V4SiV4SiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_d, "V2SLLiV2SLLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsat_bu, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_hu, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_wu, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsat_du, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vexth_h_b, "V8sV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_w_h, "V4SiV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_d_w, "V2LLiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_q_d, "V2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vexth_hu_bu, "V8UsV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_wu_hu, "V4UiV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_du_wu, "V2ULLiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vexth_qu_du, "V2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsigncov_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsigncov_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsigncov_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsigncov_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmskltz_b, "V16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmskltz_h, "V8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmskltz_w, "V4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmskltz_d, "V2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vmskgez_b, "V16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vmsknz_b, "V8sV8s", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vldi, "V2LLiIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrepli_b, "V16cIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrepli_h, "V8sIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrepli_w, "V4iIi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrepli_d, "V2LLiIi", "nc", "lsx") ++ ++ ++TARGET_BUILTIN(__builtin_lsx_vand_v, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vor_v, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vxor_v, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vnor_v, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vandn_v, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vorn_v, "V16ScV16ScV16Sc", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vandi_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vori_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vxori_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vnori_b, "V16UcV16UcIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsll_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsll_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsll_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsll_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslli_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslli_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslli_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslli_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrl_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrl_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrl_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrli_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrli_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrli_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrli_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsra_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsra_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsra_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsra_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrai_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrai_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrai_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrai_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vrotr_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotr_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotr_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vrotri_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotri_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotri_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vrotri_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsllwil_h_b, "V8sV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsllwil_w_h, "V4SiV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsllwil_d_w, "V2LLiV4SiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vextl_q_d, "V2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsllwil_hu_bu, "V8UsV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsllwil_wu_hu, "V4UiV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsllwil_du_wu, "V2ULLiV4UiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vextl_qu_du, "V2LLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrlr_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlr_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlr_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlr_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrlri_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlri_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlri_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlri_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrar_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrar_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrar_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrar_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrari_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrari_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrari_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrari_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrln_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrln_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsran_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsran_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrarn_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrln_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrln_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrln_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssran_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssran_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssran_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrln_bu_h, "V16UcV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrln_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrln_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssran_bu_h, "V16UcV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssran_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssran_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrani_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrani_bu_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_hu_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_wu_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrani_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrarn_b_h, "V16ScV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarn_h_w, "V8sV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarn_w_d, "V4SiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrarn_bu_h, "V16UcV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarn_hu_w, "V8UsV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarn_wu_d, "V4UiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlrni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrarni_b_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_h_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_w_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_d_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrlni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrlni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vssrarni_bu_h, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_hu_w, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_wu_d, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vssrarni_du_q, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vclo_b, "V16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclo_h, "V8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclo_w, "V4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclo_d, "V2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vclz_b, "V16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclz_h, "V8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclz_w, "V4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vclz_d, "V2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpcnt_b, "V16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpcnt_h, "V8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpcnt_w, "V4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpcnt_d, "V2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitclr_b, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclr_h, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclr_w, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclr_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitclri_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclri_h, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclri_w, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitclri_d, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitset_b, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitset_h, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitset_w, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitset_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitseti_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitseti_h, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitseti_w, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitseti_d, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitrev_b, "V16UcV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrev_h, "V8UsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrev_w, "V4UiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrev_d, "V2ULLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitrevi_b, "V16UcV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrevi_h, "V8UsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrevi_w, "V4UiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbitrevi_d, "V2ULLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrstp_b, "V16ScV16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrstp_h, "V8SsV8SsV8SsV8Ss", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrstpi_b, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrstpi_h, "V8sV8sV8sIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfadd_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfadd_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfsub_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfsub_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmul_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmul_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfdiv_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfdiv_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmadd_s, "V4fV4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmadd_d, "V2dV2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmsub_s, "V4fV4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmsub_d, "V2dV2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfnmadd_s, "V4fV4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfnmadd_d, "V2dV2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfnmsub_s, "V4fV4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfnmsub_d, "V2dV2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmax_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmax_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmin_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmin_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmaxa_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmaxa_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfmina_s, "V4fV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfmina_d, "V2dV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vflogb_s, "V4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vflogb_d, "V2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfclass_s, "V4iV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfclass_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfsqrt_s, "V4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfsqrt_d, "V2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrecip_s, "V4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrecip_d, "V2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrsqrt_s, "V4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrsqrt_d, "V2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcvtl_s_h, "V4fV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcvtl_d_s, "V2dV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcvth_s_h, "V4fV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcvth_d_s, "V2dV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcvt_h_s, "V8sV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcvt_s_d, "V4fV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrintrne_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrintrne_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrintrz_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrintrz_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrintrp_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrintrp_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrintrm_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrintrm_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfrint_s, "V4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfrint_d, "V2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrne_w_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrne_l_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrz_w_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrz_l_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrp_w_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrp_l_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrm_w_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrm_l_d, "V2LLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftint_w_s, "V4SiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftint_l_d, "V2SLLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrz_wu_s, "V4UiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrz_lu_d, "V2ULLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftint_wu_s, "V4UiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftint_lu_d, "V2ULLiV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrne_w_d, "V4SiV2dV2d", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrz_w_d, "V4SiV2dV2d", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrp_w_d, "V4SiV2dV2d", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrm_w_d, "V4SiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftint_w_d, "V4SiV2dV2d", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrnel_l_s, "V2LLiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrneh_l_s, "V2LLiV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrzl_l_s, "V2LLiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrzh_l_s, "V2LLiV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrpl_l_s, "V2LLiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrph_l_s, "V2LLiV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintrml_l_s, "V2LLiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftintrmh_l_s, "V2LLiV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vftintl_l_s, "V2LLiV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vftinth_l_s, "V2LLiV4f", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vffint_s_w, "V4fV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vffint_d_l, "V2dV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vffint_s_wu, "V4fV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vffint_d_lu, "V2dV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vffintl_d_w, "V2dV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vffinth_d_w, "V2dV4Si", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vffint_s_l, "V4fV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vseq_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseq_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseq_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseq_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vseqi_b, "V16ScV16ScISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseqi_h, "V8SsV8SsISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseqi_w, "V4SiV4SiISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vseqi_d, "V2SLLiV2SLLiISi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsle_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslei_b, "V16ScV16ScISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_h, "V8SsV8SsISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_w, "V4SiV4SiISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_d, "V2SLLiV2SLLiISi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vsle_bu, "V16ScV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_hu, "V8SsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_wu, "V4SiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vsle_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslei_bu, "V16ScV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_hu, "V8SsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_wu, "V4SiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslei_du, "V2SLLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslt_b, "V16ScV16ScV16Sc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_h, "V8SsV8SsV8Ss", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_w, "V4SiV4SiV4Si", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_d, "V2SLLiV2SLLiV2SLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslti_b, "V16ScV16ScISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_h, "V8SsV8SsISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_w, "V4SiV4SiISi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_d, "V2SLLiV2SLLiISi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslt_bu, "V16ScV16UcV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_hu, "V8SsV8UsV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_wu, "V4SiV4UiV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslt_du, "V2SLLiV2ULLiV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vslti_bu, "V16ScV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_hu, "V8SsV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_wu, "V4SiV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vslti_du, "V2SLLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_caf_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cun_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_ceq_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cueq_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_clt_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cult_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cle_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cule_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cne_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cor_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_cune_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_saf_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sun_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_seq_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sueq_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_slt_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sult_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sle_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sule_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sne_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sor_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_s, "V4SiV4fV4f", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vfcmp_sune_d, "V2SLLiV2dV2d", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitsel_v, "V16UcV16UcV16UcV16Uc", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbitseli_b, "V16UcV16UcV16UcIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_b, "V16Sci", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_h, "V8Ssi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_w, "V4Sii", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplgr2vr_d, "V2SLLiLLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_b, "V16ScV16SciIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_h, "V8SsV8SsiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_w, "V4SiV4SiiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vinsgr2vr_d, "V2SLLiV2SLLiLLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_b, "iV16ScIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_h, "iV8SsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_w, "iV4SiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_d, "LLiV2SLLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_bu, "iV16UcIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_hu, "iV8UsIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_wu, "iV4UiIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickve2gr_du, "LLiV2ULLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vreplve_b, "V16cV16cUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplve_h, "V8sV8sUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplve_w, "V4iV4iUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplve_d, "V2LLiV2LLiUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vreplvei_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplvei_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplvei_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vreplvei_d, "V2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vbsll_v, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vbsrl_v, "V16cV16cIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpackev_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackev_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackev_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpackod_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackod_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackod_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpackod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpickev_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickev_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickev_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickev_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpickod_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickod_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickod_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vpickod_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vilvl_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvl_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvl_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvl_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vilvh_b, "V16cV16cV16c", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvh_h, "V8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvh_w, "V4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vilvh_d, "V2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vshuf_b, "V16UcV16UcV16UcV16Uc", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vshuf_h, "V8sV8sV8sV8s", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vshuf_w, "V4iV4iV4iV4i", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vshuf_d, "V2LLiV2LLiV2LLiV2LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vshuf4i_b, "V16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vshuf4i_h, "V8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vshuf4i_w, "V4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vshuf4i_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vpermi_w, "V4iV4iV4iIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vextrins_b, "V16cV16cV16cIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vextrins_h, "V8sV8sV8sIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vextrins_w, "V4iV4iV4iIUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vextrins_d, "V2LLiV2LLiV2LLiIUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vld, "V16ScvC*Ii", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vst, "vV16Scv*Ii", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vldx, "V16ScvC*LLi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vstx, "vV16Scv*LLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vldrepl_b, "V16cvC*Ii", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vldrepl_h, "V8svC*Ii", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vldrepl_w, "V4ivC*Ii", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vldrepl_d, "V2LLivC*Ii", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_vstelm_b, "vV16Scv*IiUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vstelm_h, "vV8Ssv*IiUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vstelm_w, "vV4Siv*IiUi", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_vstelm_d, "vV2SLLiv*IiUi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_bz_v, "iV16Uc", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_bnz_v, "iV16Uc", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_bz_b, "iV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bz_h, "iV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bz_w, "iV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bz_d, "iV2ULLi", "nc", "lsx") ++ ++TARGET_BUILTIN(__builtin_lsx_bnz_b, "iV16Uc", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bnz_h, "iV8Us", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bnz_w, "iV4Ui", "nc", "lsx") ++TARGET_BUILTIN(__builtin_lsx_bnz_d, "iV2ULLi", "nc", "lsx") +diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt +index db47de2ad965..1d5573b71e6d 100644 +--- a/clang/lib/Headers/CMakeLists.txt ++++ b/clang/lib/Headers/CMakeLists.txt +@@ -78,6 +78,7 @@ set(hlsl_files + + set(loongarch_files + larchintrin.h ++ lsxintrin.h + ) + + set(mips_msa_files +diff --git a/clang/lib/Headers/lsxintrin.h b/clang/lib/Headers/lsxintrin.h +new file mode 100644 +index 000000000000..a29bc7757ab5 +--- /dev/null ++++ b/clang/lib/Headers/lsxintrin.h +@@ -0,0 +1,3726 @@ ++/*===------------- lsxintrin.h - LoongArch LSX intrinsics ------------------=== ++ * ++ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++ * See https://llvm.org/LICENSE.txt for license information. ++ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++ * ++ *===-----------------------------------------------------------------------=== ++ */ ++ ++#ifndef _LOONGSON_SXINTRIN_H ++#define _LOONGSON_SXINTRIN_H 1 ++ ++#if defined(__loongarch_sx) ++typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); ++typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); ++typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); ++typedef short v8i16 __attribute__((vector_size(16), aligned(16))); ++typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); ++typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); ++typedef int v4i32 __attribute__((vector_size(16), aligned(16))); ++typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); ++typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); ++typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); ++typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); ++typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); ++typedef float v4f32 __attribute__((vector_size(16), aligned(16))); ++typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); ++typedef double v2f64 __attribute__((vector_size(16), aligned(16))); ++typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); ++ ++typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); ++typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); ++typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsll_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsll_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vslli_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vslli_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vslli_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vslli_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vslli_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsra_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsra_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vsrai_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsrai_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsrai_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsrai_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrai_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrar_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrar_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vsrari_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsrari_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsrari_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsrari_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrari_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrl_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrl_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vsrli_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsrli_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsrli_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsrli_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrli_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlr_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vsrlri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsrlri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsrlri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsrlri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsrlri_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_b((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitclr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitclr_d((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vbitclri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_b((v16u8)(_1), (_2))) ++ ++#define __lsx_vbitclri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_h((v8u16)(_1), (_2))) ++ ++#define __lsx_vbitclri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_w((v4u32)(_1), (_2))) ++ ++#define __lsx_vbitclri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitclri_d((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_b((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitset_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitset_d((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vbitseti_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_b((v16u8)(_1), (_2))) ++ ++#define __lsx_vbitseti_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_h((v8u16)(_1), (_2))) ++ ++#define __lsx_vbitseti_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_w((v4u32)(_1), (_2))) ++ ++#define __lsx_vbitseti_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitseti_d((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_b((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitrev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vbitrev_d((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vbitrevi_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_b((v16u8)(_1), (_2))) ++ ++#define __lsx_vbitrevi_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_h((v8u16)(_1), (_2))) ++ ++#define __lsx_vbitrevi_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_w((v4u32)(_1), (_2))) ++ ++#define __lsx_vbitrevi_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vbitrevi_d((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vaddi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_bu((v16i8)(_1), (_2))) ++ ++#define __lsx_vaddi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_hu((v8i16)(_1), (_2))) ++ ++#define __lsx_vaddi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_wu((v4i32)(_1), (_2))) ++ ++#define __lsx_vaddi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vaddi_du((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vsubi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_bu((v16i8)(_1), (_2))) ++ ++#define __lsx_vsubi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_hu((v8i16)(_1), (_2))) ++ ++#define __lsx_vsubi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_wu((v4i32)(_1), (_2))) ++ ++#define __lsx_vsubi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsubi_du((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vmaxi_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vmaxi_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vmaxi_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vmaxi_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmax_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmax_du((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vmaxi_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vmaxi_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vmaxi_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_wu((v4u32)(_1), (_2))) ++ ++#define __lsx_vmaxi_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmaxi_du((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vmini_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vmini_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vmini_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vmini_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmin_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmin_du((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vmini_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vmini_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vmini_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_wu((v4u32)(_1), (_2))) ++ ++#define __lsx_vmini_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vmini_du((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vseq_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vseq_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vseqi_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vseqi_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vseqi_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vseqi_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vseqi_d((v2i64)(_1), (_2))) ++ ++#define __lsx_vslti_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_b((v16i8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vslti_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vslti_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vslti_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vslt_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vslt_du((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vslti_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vslti_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vslti_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_wu((v4u32)(_1), (_2))) ++ ++#define __lsx_vslti_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslti_du((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_d((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vslei_b(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vslei_h(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vslei_w(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vslei_d(/*__m128i*/ _1, /*si5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsle_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsle_du((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vslei_bu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vslei_hu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vslei_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_wu((v4u32)(_1), (_2))) ++ ++#define __lsx_vslei_du(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vslei_du((v2u64)(_1), (_2))) ++ ++#define __lsx_vsat_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsat_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsat_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsat_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_d((v2i64)(_1), (_2))) ++ ++#define __lsx_vsat_bu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vsat_hu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vsat_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_wu((v4u32)(_1), (_2))) ++ ++#define __lsx_vsat_du(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vsat_du((v2u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadda_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadda_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsadd_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsadd_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavg_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavg_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vavgr_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vavgr_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssub_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssub_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vabsd_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vabsd_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmul_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmul_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmadd_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmadd_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsub_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmsub_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vdiv_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vdiv_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_hu_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_hu_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_wu_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_wu_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_du_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_du_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_hu_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_hu_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_wu_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_wu_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_du_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_du_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmod_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmod_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_b(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_b((v16i8)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_h(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_h((v8i16)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_w(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_w((v4i32)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplve_d(__m128i _1, int _2) { ++ return (__m128i)__builtin_lsx_vreplve_d((v2i64)_1, (int)_2); ++} ++ ++#define __lsx_vreplvei_b(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vreplvei_h(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vreplvei_w(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vreplvei_d(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((__m128i)__builtin_lsx_vreplvei_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickev_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpickod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpickod_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvh_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvh_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vilvl_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vilvl_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackev_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackev_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpackod_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vpackod_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_w((v4i32)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vand_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vand_v((v16u8)_1, (v16u8)_2); ++} ++ ++#define __lsx_vandi_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vandi_b((v16u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vor_v((v16u8)_1, (v16u8)_2); ++} ++ ++#define __lsx_vori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vori_b((v16u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vnor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vnor_v((v16u8)_1, (v16u8)_2); ++} ++ ++#define __lsx_vnori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vnori_b((v16u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vxor_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vxor_v((v16u8)_1, (v16u8)_2); ++} ++ ++#define __lsx_vxori_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vxori_b((v16u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vbitsel_v(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vbitsel_v((v16u8)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++#define __lsx_vbitseli_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vbitseli_b((v16u8)(_1), (v16u8)(_2), (_3))) ++ ++#define __lsx_vshuf4i_b(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vshuf4i_h(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vshuf4i_w(/*__m128i*/ _1, /*ui8*/ _2) \ ++ ((__m128i)__builtin_lsx_vshuf4i_w((v4i32)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_b(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_b((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_h(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_h((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_w(int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_w((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vreplgr2vr_d(long int _1) { ++ return (__m128i)__builtin_lsx_vreplgr2vr_d((long int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vpcnt_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vpcnt_d((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclo_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclo_d((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vclz_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vclz_d((v2i64)_1); ++} ++ ++#define __lsx_vpickve2gr_b(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_h(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_w(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((int)__builtin_lsx_vpickve2gr_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_d(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((long int)__builtin_lsx_vpickve2gr_d((v2i64)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_bu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_bu((v16i8)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_hu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_hu((v8i16)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_wu(/*__m128i*/ _1, /*ui2*/ _2) \ ++ ((unsigned int)__builtin_lsx_vpickve2gr_wu((v4i32)(_1), (_2))) ++ ++#define __lsx_vpickve2gr_du(/*__m128i*/ _1, /*ui1*/ _2) \ ++ ((unsigned long int)__builtin_lsx_vpickve2gr_du((v2i64)(_1), (_2))) ++ ++#define __lsx_vinsgr2vr_b(/*__m128i*/ _1, /*int*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_b((v16i8)(_1), (int)(_2), (_3))) ++ ++#define __lsx_vinsgr2vr_h(/*__m128i*/ _1, /*int*/ _2, /*ui3*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_h((v8i16)(_1), (int)(_2), (_3))) ++ ++#define __lsx_vinsgr2vr_w(/*__m128i*/ _1, /*int*/ _2, /*ui2*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_w((v4i32)(_1), (int)(_2), (_3))) ++ ++#define __lsx_vinsgr2vr_d(/*__m128i*/ _1, /*long int*/ _2, /*ui1*/ _3) \ ++ ((__m128i)__builtin_lsx_vinsgr2vr_d((v2i64)(_1), (long int)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfadd_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfadd_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfadd_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfadd_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfsub_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfsub_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfsub_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfsub_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmul_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmul_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmul_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmul_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfdiv_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfdiv_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfdiv_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfdiv_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcvt_h_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcvt_h_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvt_s_d(__m128d _1, __m128d _2) { ++ return (__m128)__builtin_lsx_vfcvt_s_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmin_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmin_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmin_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmin_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmina_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmina_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmina_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmina_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmax_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmax_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmax_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmax_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmaxa_s(__m128 _1, __m128 _2) { ++ return (__m128)__builtin_lsx_vfmaxa_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmaxa_d(__m128d _1, __m128d _2) { ++ return (__m128d)__builtin_lsx_vfmaxa_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfclass_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vfclass_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfclass_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vfclass_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfsqrt_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfsqrt_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfsqrt_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfsqrt_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrecip_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrecip_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrecip_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrecip_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrint_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrint_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrint_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrint_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrsqrt_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrsqrt_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrsqrt_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrsqrt_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vflogb_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vflogb_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vflogb_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vflogb_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvth_s_h(__m128i _1) { ++ return (__m128)__builtin_lsx_vfcvth_s_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfcvth_d_s(__m128 _1) { ++ return (__m128d)__builtin_lsx_vfcvth_d_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfcvtl_s_h(__m128i _1) { ++ return (__m128)__builtin_lsx_vfcvtl_s_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfcvtl_d_s(__m128 _1) { ++ return (__m128d)__builtin_lsx_vfcvtl_d_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftint_w_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftint_l_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_wu_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftint_wu_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_lu_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftint_lu_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrz_w_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrz_l_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_wu_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrz_wu_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_lu_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrz_lu_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_w(__m128i _1) { ++ return (__m128)__builtin_lsx_vffint_s_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffint_d_l(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffint_d_l((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_wu(__m128i _1) { ++ return (__m128)__builtin_lsx_vffint_s_wu((v4u32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffint_d_lu(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffint_d_lu((v2u64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vandn_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vandn_v((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vneg_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vneg_d((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmuh_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmuh_du((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vsllwil_h_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_h_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vsllwil_w_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_w_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vsllwil_d_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_d_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vsllwil_hu_bu(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_hu_bu((v16u8)(_1), (_2))) ++ ++#define __lsx_vsllwil_wu_hu(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_wu_hu((v8u16)(_1), (_2))) ++ ++#define __lsx_vsllwil_du_wu(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vsllwil_du_wu((v4u32)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsran_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsran_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssran_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssran_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrarn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrarn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrarn_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrarn_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrln_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrln_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsrlrn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsrlrn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_bu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_bu_h((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_hu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_hu_w((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_wu_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_wu_d((v2u64)_1, (v2u64)_2); ++} ++ ++#define __lsx_vfrstpi_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vfrstpi_b((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vfrstpi_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vfrstpi_h((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrstp_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vfrstp_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfrstp_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vfrstp_h((v8i16)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++#define __lsx_vshuf4i_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vshuf4i_d((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vbsrl_v(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbsrl_v((v16i8)(_1), (_2))) ++ ++#define __lsx_vbsll_v(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vbsll_v((v16i8)(_1), (_2))) ++ ++#define __lsx_vextrins_b(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_b((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vextrins_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_h((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vextrins_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_w((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vextrins_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vextrins_d((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskltz_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskltz_d((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsigncov_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsigncov_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmadd_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmadd_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfmsub_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfmsub_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfnmadd_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfnmadd_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfnmadd_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfnmadd_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfnmsub_s(__m128 _1, __m128 _2, __m128 _3) { ++ return (__m128)__builtin_lsx_vfnmsub_s((v4f32)_1, (v4f32)_2, (v4f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfnmsub_d(__m128d _1, __m128d _2, __m128d _3) { ++ return (__m128d)__builtin_lsx_vfnmsub_d((v2f64)_1, (v2f64)_2, (v2f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrne_w_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrne_l_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrp_w_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrp_l_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_w_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrm_w_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_l_d(__m128d _1) { ++ return (__m128i)__builtin_lsx_vftintrm_l_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftint_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftint_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vffint_s_l(__m128i _1, __m128i _2) { ++ return (__m128)__builtin_lsx_vffint_s_l((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrz_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrz_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrp_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrp_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrm_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrm_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrne_w_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vftintrne_w_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintl_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftinth_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftinth_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffinth_d_w(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffinth_d_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vffintl_d_w(__m128i _1) { ++ return (__m128d)__builtin_lsx_vffintl_d_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrzl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrzl_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrzh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrzh_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrpl_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrpl_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrph_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrph_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrml_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrml_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrmh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrmh_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrnel_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrnel_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vftintrneh_l_s(__m128 _1) { ++ return (__m128i)__builtin_lsx_vftintrneh_l_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrne_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrne_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrne_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrne_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrz_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrz_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrz_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrz_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrp_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrp_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrp_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrp_d((v2f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128 ++ __lsx_vfrintrm_s(__m128 _1) { ++ return (__m128)__builtin_lsx_vfrintrm_s((v4f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128d ++ __lsx_vfrintrm_d(__m128d _1) { ++ return (__m128d)__builtin_lsx_vfrintrm_d((v2f64)_1); ++} ++ ++#define __lsx_vstelm_b(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_b((v16i8)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lsx_vstelm_h(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_h((v8i16)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lsx_vstelm_w(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_w((v4i32)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lsx_vstelm_d(/*__m128i*/ _1, /*void **/ _2, /*si8*/ _3, /*idx*/ _4) \ ++ ((void)__builtin_lsx_vstelm_d((v2i64)(_1), (void *)(_2), (_3), (_4))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsubwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsubwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwev_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwev_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vaddwod_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vaddwod_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_wu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_wu((v4u32)_1, (v4u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_hu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_hu((v8u16)_1, (v8u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_bu(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_bu((v16u8)_1, (v16u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_d_wu_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_d_wu_w((v4u32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_w_hu_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_w_hu_h((v8u16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_h_bu_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_h_bu_b((v16u8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwev_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwev_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmulwod_q_du_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vmulwod_q_du_d((v2u64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhaddw_qu_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhaddw_qu_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_q_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_q_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vhsubw_qu_du(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vhsubw_qu_du((v2u64)_1, (v2u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_wu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_hu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_bu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_w((v2i64)_1, (v4i32)_2, (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_h((v4i32)_1, (v8i16)_2, (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_b((v8i16)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_wu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_wu((v2u64)_1, (v4u32)_2, (v4u32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_hu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_hu((v4u32)_1, (v8u16)_2, (v8u16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_bu(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_bu((v8u16)_1, (v16u8)_2, (v16u8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_d_wu_w((v2i64)_1, (v4u32)_2, ++ (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_w_hu_h((v4i32)_1, (v8u16)_2, ++ (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_h_bu_b((v8i16)_1, (v16u8)_2, ++ (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_d_wu_w(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_d_wu_w((v2i64)_1, (v4u32)_2, ++ (v4i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_w_hu_h(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_w_hu_h((v4i32)_1, (v8u16)_2, ++ (v8i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_h_bu_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_h_bu_b((v8i16)_1, (v16u8)_2, ++ (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_d((v2i64)_1, (v2i64)_2, (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_du(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_du(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_du((v2u64)_1, (v2u64)_2, (v2u64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwev_q_du_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwev_q_du_d((v2i64)_1, (v2u64)_2, ++ (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmaddwod_q_du_d(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vmaddwod_q_du_d((v2i64)_1, (v2u64)_2, ++ (v2i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_b(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_b((v16i8)_1, (v16i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vrotr_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vrotr_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vadd_q(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vadd_q((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vsub_q(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vsub_q((v2i64)_1, (v2i64)_2); ++} ++ ++#define __lsx_vldrepl_b(/*void **/ _1, /*si12*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_b((void const *)(_1), (_2))) ++ ++#define __lsx_vldrepl_h(/*void **/ _1, /*si11*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_h((void const *)(_1), (_2))) ++ ++#define __lsx_vldrepl_w(/*void **/ _1, /*si10*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_w((void const *)(_1), (_2))) ++ ++#define __lsx_vldrepl_d(/*void **/ _1, /*si9*/ _2) \ ++ ((__m128i)__builtin_lsx_vldrepl_d((void const *)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmskgez_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmskgez_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vmsknz_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vmsknz_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_h_b(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_h_b((v16i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_w_h(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_w_h((v8i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_d_w(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_d_w((v4i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_q_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_q_d((v2i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_hu_bu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_hu_bu((v16u8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_wu_hu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_wu_hu((v8u16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_du_wu(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_du_wu((v4u32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vexth_qu_du(__m128i _1) { ++ return (__m128i)__builtin_lsx_vexth_qu_du((v2u64)_1); ++} ++ ++#define __lsx_vrotri_b(/*__m128i*/ _1, /*ui3*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_b((v16i8)(_1), (_2))) ++ ++#define __lsx_vrotri_h(/*__m128i*/ _1, /*ui4*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_h((v8i16)(_1), (_2))) ++ ++#define __lsx_vrotri_w(/*__m128i*/ _1, /*ui5*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_w((v4i32)(_1), (_2))) ++ ++#define __lsx_vrotri_d(/*__m128i*/ _1, /*ui6*/ _2) \ ++ ((__m128i)__builtin_lsx_vrotri_d((v2i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vextl_q_d(__m128i _1) { ++ return (__m128i)__builtin_lsx_vextl_q_d((v2i64)_1); ++} ++ ++#define __lsx_vsrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vsrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vsrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vsrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vsrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vsrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vsrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vsrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrlni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrlni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrlni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrlni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrlni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrlni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrlni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrlni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrlrni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrlrni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrlrni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrlrni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrlrni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrlrni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrlrni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrlrni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrlrni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vsrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vsrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vsrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vsrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vsrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vsrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vsrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vsrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vsrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrani_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrani_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrani_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrani_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrani_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrani_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrani_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrani_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrani_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrarni_b_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_b_h((v16i8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrarni_h_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_h_w((v8i16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrarni_w_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_w_d((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrarni_d_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_d_q((v2i64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vssrarni_bu_h(/*__m128i*/ _1, /*__m128i*/ _2, /*ui4*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_bu_h((v16u8)(_1), (v16i8)(_2), (_3))) ++ ++#define __lsx_vssrarni_hu_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui5*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_hu_w((v8u16)(_1), (v8i16)(_2), (_3))) ++ ++#define __lsx_vssrarni_wu_d(/*__m128i*/ _1, /*__m128i*/ _2, /*ui6*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_wu_d((v4u32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vssrarni_du_q(/*__m128i*/ _1, /*__m128i*/ _2, /*ui7*/ _3) \ ++ ((__m128i)__builtin_lsx_vssrarni_du_q((v2u64)(_1), (v2i64)(_2), (_3))) ++ ++#define __lsx_vpermi_w(/*__m128i*/ _1, /*__m128i*/ _2, /*ui8*/ _3) \ ++ ((__m128i)__builtin_lsx_vpermi_w((v4i32)(_1), (v4i32)(_2), (_3))) ++ ++#define __lsx_vld(/*void **/ _1, /*si12*/ _2) \ ++ ((__m128i)__builtin_lsx_vld((void const *)(_1), (_2))) ++ ++#define __lsx_vst(/*__m128i*/ _1, /*void **/ _2, /*si12*/ _3) \ ++ ((void)__builtin_lsx_vst((v16i8)(_1), (void *)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrlrn_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrlrn_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_b_h(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_b_h((v8i16)_1, (v8i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_h_w(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_h_w((v4i32)_1, (v4i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vssrln_w_d(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vssrln_w_d((v2i64)_1, (v2i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vorn_v(__m128i _1, __m128i _2) { ++ return (__m128i)__builtin_lsx_vorn_v((v16i8)_1, (v16i8)_2); ++} ++ ++#define __lsx_vldi(/*i13*/ _1) ((__m128i)__builtin_lsx_vldi((_1))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vshuf_b(__m128i _1, __m128i _2, __m128i _3) { ++ return (__m128i)__builtin_lsx_vshuf_b((v16i8)_1, (v16i8)_2, (v16i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vldx(void const *_1, long int _2) { ++ return (__m128i)__builtin_lsx_vldx((void const *)_1, (long int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void ++ __lsx_vstx(__m128i _1, void *_2, long int _3) { ++ return (void)__builtin_lsx_vstx((v16i8)_1, (void *)_2, (long int)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vextl_qu_du(__m128i _1) { ++ return (__m128i)__builtin_lsx_vextl_qu_du((v2u64)_1); ++} ++ ++#define __lsx_bnz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_b((v16u8)(_1))) ++ ++#define __lsx_bnz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_d((v2u64)(_1))) ++ ++#define __lsx_bnz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_h((v8u16)(_1))) ++ ++#define __lsx_bnz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_v((v16u8)(_1))) ++ ++#define __lsx_bnz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bnz_w((v4u32)(_1))) ++ ++#define __lsx_bz_b(/*__m128i*/ _1) ((int)__builtin_lsx_bz_b((v16u8)(_1))) ++ ++#define __lsx_bz_d(/*__m128i*/ _1) ((int)__builtin_lsx_bz_d((v2u64)(_1))) ++ ++#define __lsx_bz_h(/*__m128i*/ _1) ((int)__builtin_lsx_bz_h((v8u16)(_1))) ++ ++#define __lsx_bz_v(/*__m128i*/ _1) ((int)__builtin_lsx_bz_v((v16u8)(_1))) ++ ++#define __lsx_bz_w(/*__m128i*/ _1) ((int)__builtin_lsx_bz_w((v4u32)(_1))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_caf_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_caf_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_caf_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_caf_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_ceq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_ceq_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_ceq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_ceq_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cle_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cle_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cle_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cle_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_clt_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_clt_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_clt_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_clt_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cne_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cne_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cne_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cne_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cor_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cor_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cor_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cor_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cueq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cueq_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cueq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cueq_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cule_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cule_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cule_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cule_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cult_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cult_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cult_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cult_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cun_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cun_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cune_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cune_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cune_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cune_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_cun_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_cun_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_saf_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_saf_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_saf_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_saf_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_seq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_seq_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_seq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_seq_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sle_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sle_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sle_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sle_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_slt_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_slt_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_slt_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_slt_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sne_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sne_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sne_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sne_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sor_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sor_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sor_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sor_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sueq_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sueq_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sueq_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sueq_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sule_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sule_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sule_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sule_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sult_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sult_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sult_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sult_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sun_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sun_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sune_d(__m128d _1, __m128d _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sune_d((v2f64)_1, (v2f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sune_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sune_s((v4f32)_1, (v4f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m128i ++ __lsx_vfcmp_sun_s(__m128 _1, __m128 _2) { ++ return (__m128i)__builtin_lsx_vfcmp_sun_s((v4f32)_1, (v4f32)_2); ++} ++ ++#define __lsx_vrepli_b(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_b((_1))) ++ ++#define __lsx_vrepli_d(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_d((_1))) ++ ++#define __lsx_vrepli_h(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_h((_1))) ++ ++#define __lsx_vrepli_w(/*si10*/ _1) ((__m128i)__builtin_lsx_vrepli_w((_1))) ++ ++#endif /* defined(__loongarch_sx) */ ++#endif /* _LOONGSON_SXINTRIN_H */ +diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp +index a8416bf4de92..d1b015502725 100644 +--- a/clang/lib/Sema/SemaChecking.cpp ++++ b/clang/lib/Sema/SemaChecking.cpp +@@ -3826,6 +3826,7 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + switch (BuiltinID) { + default: + break; ++ // Basic intrinsics. + case LoongArch::BI__builtin_loongarch_cacop_d: + case LoongArch::BI__builtin_loongarch_cacop_w: { + SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(5)); +@@ -3854,8 +3855,234 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + case LoongArch::BI__builtin_loongarch_movfcsr2gr: + case LoongArch::BI__builtin_loongarch_movgr2fcsr: + return SemaBuiltinConstantArgRange(TheCall, 0, 0, llvm::maxUIntN(2)); +- } + ++ // LSX intrinsics. ++ case LoongArch::BI__builtin_lsx_vbitclri_b: ++ case LoongArch::BI__builtin_lsx_vbitrevi_b: ++ case LoongArch::BI__builtin_lsx_vbitseti_b: ++ case LoongArch::BI__builtin_lsx_vsat_b: ++ case LoongArch::BI__builtin_lsx_vsat_bu: ++ case LoongArch::BI__builtin_lsx_vslli_b: ++ case LoongArch::BI__builtin_lsx_vsrai_b: ++ case LoongArch::BI__builtin_lsx_vsrari_b: ++ case LoongArch::BI__builtin_lsx_vsrli_b: ++ case LoongArch::BI__builtin_lsx_vsllwil_h_b: ++ case LoongArch::BI__builtin_lsx_vsllwil_hu_bu: ++ case LoongArch::BI__builtin_lsx_vrotri_b: ++ case LoongArch::BI__builtin_lsx_vsrlri_b: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); ++ case LoongArch::BI__builtin_lsx_vbitclri_h: ++ case LoongArch::BI__builtin_lsx_vbitrevi_h: ++ case LoongArch::BI__builtin_lsx_vbitseti_h: ++ case LoongArch::BI__builtin_lsx_vsat_h: ++ case LoongArch::BI__builtin_lsx_vsat_hu: ++ case LoongArch::BI__builtin_lsx_vslli_h: ++ case LoongArch::BI__builtin_lsx_vsrai_h: ++ case LoongArch::BI__builtin_lsx_vsrari_h: ++ case LoongArch::BI__builtin_lsx_vsrli_h: ++ case LoongArch::BI__builtin_lsx_vsllwil_w_h: ++ case LoongArch::BI__builtin_lsx_vsllwil_wu_hu: ++ case LoongArch::BI__builtin_lsx_vrotri_h: ++ case LoongArch::BI__builtin_lsx_vsrlri_h: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); ++ case LoongArch::BI__builtin_lsx_vssrarni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrarni_bu_h: ++ case LoongArch::BI__builtin_lsx_vssrani_b_h: ++ case LoongArch::BI__builtin_lsx_vssrani_bu_h: ++ case LoongArch::BI__builtin_lsx_vsrarni_b_h: ++ case LoongArch::BI__builtin_lsx_vsrlni_b_h: ++ case LoongArch::BI__builtin_lsx_vsrlrni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlni_bu_h: ++ case LoongArch::BI__builtin_lsx_vssrlrni_b_h: ++ case LoongArch::BI__builtin_lsx_vssrlrni_bu_h: ++ case LoongArch::BI__builtin_lsx_vsrani_b_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); ++ case LoongArch::BI__builtin_lsx_vslei_bu: ++ case LoongArch::BI__builtin_lsx_vslei_hu: ++ case LoongArch::BI__builtin_lsx_vslei_wu: ++ case LoongArch::BI__builtin_lsx_vslei_du: ++ case LoongArch::BI__builtin_lsx_vslti_bu: ++ case LoongArch::BI__builtin_lsx_vslti_hu: ++ case LoongArch::BI__builtin_lsx_vslti_wu: ++ case LoongArch::BI__builtin_lsx_vslti_du: ++ case LoongArch::BI__builtin_lsx_vmaxi_bu: ++ case LoongArch::BI__builtin_lsx_vmaxi_hu: ++ case LoongArch::BI__builtin_lsx_vmaxi_wu: ++ case LoongArch::BI__builtin_lsx_vmaxi_du: ++ case LoongArch::BI__builtin_lsx_vmini_bu: ++ case LoongArch::BI__builtin_lsx_vmini_hu: ++ case LoongArch::BI__builtin_lsx_vmini_wu: ++ case LoongArch::BI__builtin_lsx_vmini_du: ++ case LoongArch::BI__builtin_lsx_vaddi_bu: ++ case LoongArch::BI__builtin_lsx_vaddi_hu: ++ case LoongArch::BI__builtin_lsx_vaddi_wu: ++ case LoongArch::BI__builtin_lsx_vaddi_du: ++ case LoongArch::BI__builtin_lsx_vbitclri_w: ++ case LoongArch::BI__builtin_lsx_vbitrevi_w: ++ case LoongArch::BI__builtin_lsx_vbitseti_w: ++ case LoongArch::BI__builtin_lsx_vsat_w: ++ case LoongArch::BI__builtin_lsx_vsat_wu: ++ case LoongArch::BI__builtin_lsx_vslli_w: ++ case LoongArch::BI__builtin_lsx_vsrai_w: ++ case LoongArch::BI__builtin_lsx_vsrari_w: ++ case LoongArch::BI__builtin_lsx_vsrli_w: ++ case LoongArch::BI__builtin_lsx_vsllwil_d_w: ++ case LoongArch::BI__builtin_lsx_vsllwil_du_wu: ++ case LoongArch::BI__builtin_lsx_vsrlri_w: ++ case LoongArch::BI__builtin_lsx_vrotri_w: ++ case LoongArch::BI__builtin_lsx_vsubi_bu: ++ case LoongArch::BI__builtin_lsx_vsubi_hu: ++ case LoongArch::BI__builtin_lsx_vbsrl_v: ++ case LoongArch::BI__builtin_lsx_vbsll_v: ++ case LoongArch::BI__builtin_lsx_vsubi_wu: ++ case LoongArch::BI__builtin_lsx_vsubi_du: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); ++ case LoongArch::BI__builtin_lsx_vssrarni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrarni_hu_w: ++ case LoongArch::BI__builtin_lsx_vssrani_h_w: ++ case LoongArch::BI__builtin_lsx_vssrani_hu_w: ++ case LoongArch::BI__builtin_lsx_vsrarni_h_w: ++ case LoongArch::BI__builtin_lsx_vsrani_h_w: ++ case LoongArch::BI__builtin_lsx_vfrstpi_b: ++ case LoongArch::BI__builtin_lsx_vfrstpi_h: ++ case LoongArch::BI__builtin_lsx_vsrlni_h_w: ++ case LoongArch::BI__builtin_lsx_vsrlrni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlni_hu_w: ++ case LoongArch::BI__builtin_lsx_vssrlrni_h_w: ++ case LoongArch::BI__builtin_lsx_vssrlrni_hu_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); ++ case LoongArch::BI__builtin_lsx_vbitclri_d: ++ case LoongArch::BI__builtin_lsx_vbitrevi_d: ++ case LoongArch::BI__builtin_lsx_vbitseti_d: ++ case LoongArch::BI__builtin_lsx_vsat_d: ++ case LoongArch::BI__builtin_lsx_vsat_du: ++ case LoongArch::BI__builtin_lsx_vslli_d: ++ case LoongArch::BI__builtin_lsx_vsrai_d: ++ case LoongArch::BI__builtin_lsx_vsrli_d: ++ case LoongArch::BI__builtin_lsx_vsrari_d: ++ case LoongArch::BI__builtin_lsx_vrotri_d: ++ case LoongArch::BI__builtin_lsx_vsrlri_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); ++ case LoongArch::BI__builtin_lsx_vssrarni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrarni_wu_d: ++ case LoongArch::BI__builtin_lsx_vssrani_w_d: ++ case LoongArch::BI__builtin_lsx_vssrani_wu_d: ++ case LoongArch::BI__builtin_lsx_vsrarni_w_d: ++ case LoongArch::BI__builtin_lsx_vsrlni_w_d: ++ case LoongArch::BI__builtin_lsx_vsrlrni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlni_wu_d: ++ case LoongArch::BI__builtin_lsx_vssrlrni_w_d: ++ case LoongArch::BI__builtin_lsx_vssrlrni_wu_d: ++ case LoongArch::BI__builtin_lsx_vsrani_w_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); ++ case LoongArch::BI__builtin_lsx_vssrarni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrarni_du_q: ++ case LoongArch::BI__builtin_lsx_vssrani_d_q: ++ case LoongArch::BI__builtin_lsx_vssrani_du_q: ++ case LoongArch::BI__builtin_lsx_vsrarni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlni_du_q: ++ case LoongArch::BI__builtin_lsx_vssrlrni_d_q: ++ case LoongArch::BI__builtin_lsx_vssrlrni_du_q: ++ case LoongArch::BI__builtin_lsx_vsrani_d_q: ++ case LoongArch::BI__builtin_lsx_vsrlrni_d_q: ++ case LoongArch::BI__builtin_lsx_vsrlni_d_q: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); ++ case LoongArch::BI__builtin_lsx_vseqi_b: ++ case LoongArch::BI__builtin_lsx_vseqi_h: ++ case LoongArch::BI__builtin_lsx_vseqi_w: ++ case LoongArch::BI__builtin_lsx_vseqi_d: ++ case LoongArch::BI__builtin_lsx_vslti_b: ++ case LoongArch::BI__builtin_lsx_vslti_h: ++ case LoongArch::BI__builtin_lsx_vslti_w: ++ case LoongArch::BI__builtin_lsx_vslti_d: ++ case LoongArch::BI__builtin_lsx_vslei_b: ++ case LoongArch::BI__builtin_lsx_vslei_h: ++ case LoongArch::BI__builtin_lsx_vslei_w: ++ case LoongArch::BI__builtin_lsx_vslei_d: ++ case LoongArch::BI__builtin_lsx_vmaxi_b: ++ case LoongArch::BI__builtin_lsx_vmaxi_h: ++ case LoongArch::BI__builtin_lsx_vmaxi_w: ++ case LoongArch::BI__builtin_lsx_vmaxi_d: ++ case LoongArch::BI__builtin_lsx_vmini_b: ++ case LoongArch::BI__builtin_lsx_vmini_h: ++ case LoongArch::BI__builtin_lsx_vmini_w: ++ case LoongArch::BI__builtin_lsx_vmini_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); ++ case LoongArch::BI__builtin_lsx_vandi_b: ++ case LoongArch::BI__builtin_lsx_vnori_b: ++ case LoongArch::BI__builtin_lsx_vori_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_h: ++ case LoongArch::BI__builtin_lsx_vshuf4i_w: ++ case LoongArch::BI__builtin_lsx_vxori_b: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); ++ case LoongArch::BI__builtin_lsx_vbitseli_b: ++ case LoongArch::BI__builtin_lsx_vshuf4i_d: ++ case LoongArch::BI__builtin_lsx_vextrins_b: ++ case LoongArch::BI__builtin_lsx_vextrins_h: ++ case LoongArch::BI__builtin_lsx_vextrins_w: ++ case LoongArch::BI__builtin_lsx_vextrins_d: ++ case LoongArch::BI__builtin_lsx_vpermi_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); ++ case LoongArch::BI__builtin_lsx_vpickve2gr_b: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_bu: ++ case LoongArch::BI__builtin_lsx_vreplvei_b: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_b: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); ++ case LoongArch::BI__builtin_lsx_vpickve2gr_h: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_hu: ++ case LoongArch::BI__builtin_lsx_vreplvei_h: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); ++ case LoongArch::BI__builtin_lsx_vpickve2gr_w: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_wu: ++ case LoongArch::BI__builtin_lsx_vreplvei_w: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); ++ case LoongArch::BI__builtin_lsx_vpickve2gr_d: ++ case LoongArch::BI__builtin_lsx_vpickve2gr_du: ++ case LoongArch::BI__builtin_lsx_vreplvei_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); ++ case LoongArch::BI__builtin_lsx_vinsgr2vr_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 1); ++ case LoongArch::BI__builtin_lsx_vstelm_b: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); ++ case LoongArch::BI__builtin_lsx_vstelm_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); ++ case LoongArch::BI__builtin_lsx_vstelm_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); ++ case LoongArch::BI__builtin_lsx_vstelm_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 1); ++ case LoongArch::BI__builtin_lsx_vldrepl_b: ++ case LoongArch::BI__builtin_lsx_vld: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); ++ case LoongArch::BI__builtin_lsx_vldrepl_h: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); ++ case LoongArch::BI__builtin_lsx_vldrepl_w: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); ++ case LoongArch::BI__builtin_lsx_vldrepl_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); ++ case LoongArch::BI__builtin_lsx_vst: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); ++ case LoongArch::BI__builtin_lsx_vldi: ++ return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); ++ case LoongArch::BI__builtin_lsx_vrepli_b: ++ case LoongArch::BI__builtin_lsx_vrepli_h: ++ case LoongArch::BI__builtin_lsx_vrepli_w: ++ case LoongArch::BI__builtin_lsx_vrepli_d: ++ return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); ++ } + return false; + } + +-- +2.20.1 + + +From c2191962d5c7fb671f1caa7d868cf985ecb1ee5b Mon Sep 17 00:00:00 2001 +From: licongtian <licongtian@loongson.cn> +Date: Wed, 25 Oct 2023 17:44:06 +0800 +Subject: PATCH 5/8 ClangLoongArch Support the builtin functions for LASX + +This patch does the following work: +- Define the builtin functions for LASX +- Add the header files lasxintrin.h + +(cherry picked from commit a4005e729c8d9dba9ba19f3ce4ad5b60e64dc467) +--- + .../include/clang/Basic/BuiltinsLoongArch.def | 3 + + .../clang/Basic/BuiltinsLoongArchLASX.def | 982 +++++ + clang/lib/Headers/CMakeLists.txt | 1 + + clang/lib/Headers/lasxintrin.h | 3860 +++++++++++++++++ + clang/lib/Sema/SemaChecking.cpp | 227 + + 5 files changed, 5073 insertions(+) + create mode 100644 clang/include/clang/Basic/BuiltinsLoongArchLASX.def + create mode 100644 clang/lib/Headers/lasxintrin.h + +diff --git a/clang/include/clang/Basic/BuiltinsLoongArch.def b/clang/include/clang/Basic/BuiltinsLoongArch.def +index 9ec19c31095a..95359a3fdc71 100644 +--- a/clang/include/clang/Basic/BuiltinsLoongArch.def ++++ b/clang/include/clang/Basic/BuiltinsLoongArch.def +@@ -21,5 +21,8 @@ + // Definition of LSX builtins. + #include "clang/Basic/BuiltinsLoongArchLSX.def" + ++// Definition of LASX builtins. ++#include "clang/Basic/BuiltinsLoongArchLASX.def" ++ + #undef BUILTIN + #undef TARGET_BUILTIN +diff --git a/clang/include/clang/Basic/BuiltinsLoongArchLASX.def b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def +new file mode 100644 +index 000000000000..3de200f665b6 +--- /dev/null ++++ b/clang/include/clang/Basic/BuiltinsLoongArchLASX.def +@@ -0,0 +1,982 @@ ++//=BuiltinsLoongArchLASX.def - LoongArch Builtin function database -- C++ -*-=// ++// ++// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++// See https://llvm.org/LICENSE.txt for license information. ++// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines the LoongArch-specific LASX builtin function database. ++// Users of this file must define the BUILTIN macro to make use of this ++// information. ++// ++//===----------------------------------------------------------------------===// ++ ++TARGET_BUILTIN(__builtin_lasx_xvadd_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadd_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadd_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadd_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadd_q, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsub_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsub_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsub_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsub_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsub_q, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddi_bu, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddi_hu, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddi_wu, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddi_du, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsubi_bu, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubi_hu, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubi_wu, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubi_du, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvneg_b, "V32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvneg_h, "V16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvneg_w, "V8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvneg_d, "V4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsadd_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsadd_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsadd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssub_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssub_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssub_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhaddw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_h_b, "V16SsV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_w_h, "V8SiV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_d_w, "V4SLLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_hu_bu, "V16UsV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_wu_hu, "V8UiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_du_wu, "V4ULLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvhsubw_qu_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsubwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvaddwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvavg_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvavg_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavg_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvavgr_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvavgr_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvavgr_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvabsd_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvabsd_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvabsd_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvadda_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadda_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadda_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvadda_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmax_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_b, "V32ScV32ScIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_h, "V16SsV16SsIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_w, "V8SiV8SiIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_d, "V4SLLiV4SLLiIi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmax_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmax_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_bu, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_hu, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_wu, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaxi_du, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmin_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmini_b, "V32ScV32ScIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_h, "V16SsV16SsIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_w, "V8SiV8SiIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_d, "V4SLLiV4SLLiIi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmin_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmin_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmini_bu, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_hu, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_wu, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmini_du, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmul_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmul_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmul_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmul_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmuh_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmuh_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmuh_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_b, "V16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_h, "V8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_w, "V4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu, "V16sV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu, "V8SiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu, "V4LLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du, "V4LLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_h_bu_b, "V16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwev_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_h_bu_b, "V16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_w_hu_h, "V8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_d_wu_w, "V4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmulwod_q_du_d, "V4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmadd_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmadd_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmadd_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmadd_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmsub_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmsub_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmsub_w, "V8SiV8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmsub_d, "V4SLLiV4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_b, "V16sV16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_b, "V16sV16sV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_h, "V8SiV8SiV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_w, "V4LLiV4LLiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu, "V16UsV16UsV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu, "V8UiV8UiV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu, "V4ULLiV4ULLiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du, "V4ULLiV4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwev_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_h_bu_b, "V16sV16sV32UcV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_w_hu_h, "V8SiV8SiV16UsV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_d_wu_w, "V4LLiV4LLiV8UiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmaddwod_q_du_d, "V4LLiV4LLiV4ULLiV4LLi", "nc", "lasx") ++ ++ ++TARGET_BUILTIN(__builtin_lasx_xvdiv_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvdiv_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvdiv_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmod_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmod_bu, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_hu, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_wu, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmod_du, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsat_b, "V32ScV32ScIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_h, "V16SsV16SsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_w, "V8SiV8SiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_d, "V4SLLiV4SLLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsat_bu, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_hu, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_wu, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsat_du, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvexth_h_b, "V16sV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_w_h, "V8SiV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_d_w, "V4LLiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_q_d, "V4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvexth_hu_bu, "V16UsV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_wu_hu, "V8UiV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_du_wu, "V4ULLiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvexth_qu_du, "V4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_vext2xv_h_b, "V16sV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_w_b, "V8SiV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_d_b, "V4LLiV32c", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_vext2xv_w_h, "V8SiV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_d_h, "V4LLiV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_d_w, "V4LLiV8Si", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_vext2xv_hu_bu, "V16sV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_bu, "V8SiV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_du_bu, "V4LLiV32c", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_vext2xv_wu_hu, "V8SiV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_du_hu, "V4LLiV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_vext2xv_du_wu, "V4LLiV8Si", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsigncov_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsigncov_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsigncov_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsigncov_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmskltz_b, "V32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmskltz_h, "V16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmskltz_w, "V8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmskltz_d, "V4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvmskgez_b, "V32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvmsknz_b, "V16sV16s", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvldi, "V4LLiIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepli_b, "V32cIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepli_h, "V16sIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepli_w, "V8iIi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepli_d, "V4LLiIi", "nc", "lasx") ++ ++ ++TARGET_BUILTIN(__builtin_lasx_xvand_v, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvor_v, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvxor_v, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvnor_v, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvandn_v, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvorn_v, "V32ScV32ScV32Sc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvandi_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvori_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvxori_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvnori_b, "V32UcV32UcIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsll_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsll_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsll_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsll_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslli_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslli_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslli_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslli_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrl_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrl_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrl_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrli_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrli_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrli_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrli_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsra_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsra_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsra_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsra_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrai_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrai_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrai_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrai_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvrotr_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotr_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotr_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvrotri_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotri_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotri_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrotri_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_h_b, "V16sV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_w_h, "V8SiV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_d_w, "V4LLiV8SiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvextl_q_d, "V4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_hu_bu, "V16UsV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_wu_hu, "V8UiV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsllwil_du_wu, "V4ULLiV8UiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvextl_qu_du, "V4LLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrlr_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlr_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlr_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlr_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrlri_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlri_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlri_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlri_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrar_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrar_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrar_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrar_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrari_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrari_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrari_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrari_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrln_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrln_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsran_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsran_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrarn_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrln_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrln_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrln_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssran_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssran_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssran_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrln_bu_h, "V32UcV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrln_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrln_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssran_bu_h, "V32UcV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssran_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssran_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrani_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrani_bu_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_hu_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_wu_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrani_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_b_h, "V32ScV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_h_w, "V16sV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_w_d, "V8SiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_bu_h, "V32UcV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_hu_w, "V16UsV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarn_wu_d, "V8UiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlrni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_b_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_h_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_w_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_d_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrlni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_bu_h, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_hu_w, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_wu_d, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvssrarni_du_q, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvclo_b, "V32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclo_h, "V16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclo_w, "V8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclo_d, "V4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvclz_b, "V32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclz_h, "V16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclz_w, "V8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvclz_d, "V4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpcnt_b, "V32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpcnt_h, "V16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpcnt_w, "V8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpcnt_d, "V4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitclr_b, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclr_h, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclr_w, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclr_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitclri_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclri_h, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclri_w, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitclri_d, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitset_b, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitset_h, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitset_w, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitset_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitseti_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitseti_h, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitseti_w, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitseti_d, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitrev_b, "V32UcV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrev_h, "V16UsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrev_w, "V8UiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrev_d, "V4ULLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitrevi_b, "V32UcV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrevi_h, "V16UsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrevi_w, "V8UiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbitrevi_d, "V4ULLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrstp_b, "V32ScV32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrstp_h, "V16SsV16SsV16SsV16Ss", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrstpi_b, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrstpi_h, "V16sV16sV16sIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfadd_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfadd_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfsub_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfsub_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmul_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmul_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfdiv_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfdiv_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmadd_s, "V8fV8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmadd_d, "V4dV4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmsub_s, "V8fV8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmsub_d, "V4dV4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfnmadd_s, "V8fV8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfnmadd_d, "V4dV4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfnmsub_s, "V8fV8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfnmsub_d, "V4dV4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmax_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmax_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmin_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmin_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmaxa_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmaxa_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfmina_s, "V8fV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfmina_d, "V4dV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvflogb_s, "V8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvflogb_d, "V4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfclass_s, "V8iV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfclass_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfsqrt_s, "V8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfsqrt_d, "V4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrecip_s, "V8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrecip_d, "V4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_s, "V8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrsqrt_d, "V4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcvtl_s_h, "V8fV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcvth_s_h, "V8fV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcvtl_d_s, "V4dV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcvth_d_s, "V4dV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcvt_h_s, "V16sV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcvt_s_d, "V8fV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrintrne_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrintrne_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrintrz_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrintrz_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrintrp_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrintrp_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrintrm_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrintrm_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfrint_s, "V8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfrint_d, "V4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrne_l_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrz_l_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrp_l_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrm_l_d, "V4LLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftint_w_s, "V8SiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftint_l_d, "V4SLLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrz_wu_s, "V8UiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrz_lu_d, "V4ULLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftint_wu_s, "V8UiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftint_lu_d, "V4ULLiV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrne_w_d, "V8SiV4dV4d", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrz_w_d, "V8SiV4dV4d", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrp_w_d, "V8SiV4dV4d", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrm_w_d, "V8SiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftint_w_d, "V8SiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrnel_l_s, "V4LLiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrneh_l_s, "V4LLiV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrzl_l_s, "V4LLiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrzh_l_s, "V4LLiV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrpl_l_s, "V4LLiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrph_l_s, "V4LLiV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintrml_l_s, "V4LLiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftintrmh_l_s, "V4LLiV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvftintl_l_s, "V4LLiV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvftinth_l_s, "V4LLiV8f", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvffint_s_w, "V8fV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvffint_d_l, "V4dV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvffint_s_wu, "V8fV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvffint_d_lu, "V4dV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvffintl_d_w, "V4dV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvffinth_d_w, "V4dV8Si", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvffint_s_l, "V8fV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvseq_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseq_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseq_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseq_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvseqi_b, "V32ScV32ScISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseqi_h, "V16SsV16SsISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseqi_w, "V8SiV8SiISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvseqi_d, "V4SLLiV4SLLiISi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsle_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslei_b, "V32ScV32ScISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_h, "V16SsV16SsISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_w, "V8SiV8SiISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_d, "V4SLLiV4SLLiISi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvsle_bu, "V32ScV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_hu, "V16SsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_wu, "V8SiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvsle_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslei_bu, "V32ScV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_hu, "V16SsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_wu, "V8SiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslei_du, "V4SLLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslt_b, "V32ScV32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_h, "V16SsV16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_w, "V8SiV8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_d, "V4SLLiV4SLLiV4SLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslti_b, "V32ScV32ScISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_h, "V16SsV16SsISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_w, "V8SiV8SiISi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_d, "V4SLLiV4SLLiISi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslt_bu, "V32ScV32UcV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_hu, "V16SsV16UsV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_wu, "V8SiV8UiV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslt_du, "V4SLLiV4ULLiV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvslti_bu, "V32ScV32UcIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_hu, "V16SsV16UsIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_wu, "V8SiV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvslti_du, "V4SLLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_caf_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cun_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_ceq_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cueq_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_clt_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cult_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cle_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cule_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cne_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cor_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_cune_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_saf_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sun_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_seq_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sueq_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_slt_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sult_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sle_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sule_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sne_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sor_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_s, "V8SiV8fV8f", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvfcmp_sune_d, "V4SLLiV4dV4d", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitsel_v, "V32UcV32UcV32UcV32Uc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbitseli_b, "V32UcV32UcV32UcIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_b, "V32Sci", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_h, "V16Ssi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_w, "V8Sii", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplgr2vr_d, "V4SLLiLLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_w, "V8SiV8SiiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvinsgr2vr_d, "V4SLLiV4SLLiLLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_w, "iV8SiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_d, "LLiV4SLLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_wu, "iV8UiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickve2gr_du, "LLiV4ULLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvreplve_b, "V32cV32cUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve_h, "V16sV16sUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve_w, "V8iV8iUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve_d, "V4LLiV4LLiUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvrepl128vei_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvreplve0_b, "V32ScV32Sc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve0_h, "V16SsV16Ss", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve0_w, "V8SiV8Si", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve0_d, "V4SLLiV4SLLi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvreplve0_q, "V32ScV32Sc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvinsve0_w, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvinsve0_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickve_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickve_d, "V4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickve_w_f, "V8fV8fIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickve_d_f, "V4dV4dIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvbsll_v, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvbsrl_v, "V32cV32cIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpackev_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackev_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackev_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpackod_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackod_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackod_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpackod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickev_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickev_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickev_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickev_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpickod_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickod_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickod_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpickod_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvilvl_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvl_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvl_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvl_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvilvh_b, "V32cV32cV32c", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvh_h, "V16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvh_w, "V8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvilvh_d, "V4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvshuf_b, "V32UcV32UcV32UcV32Uc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvshuf_h, "V16sV16sV16sV16s", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvshuf_w, "V8iV8iV8iV8i", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvshuf_d, "V4LLiV4LLiV4LLiV4LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvperm_w, "V8iV8iV8i", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvshuf4i_b, "V32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvshuf4i_h, "V16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvshuf4i_w, "V8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvshuf4i_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvpermi_w, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpermi_d, "V4LLiV4LLiIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvpermi_q, "V32cV32cV32cIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvextrins_b, "V32cV32cV32cIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvextrins_h, "V16sV16sV16sIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvextrins_w, "V8iV8iV8iIUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvextrins_d, "V4LLiV4LLiV4LLiIUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvld, "V32ScvC*Ii", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvst, "vV32Scv*Ii", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvldx, "V32ScvC*LLi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvstx, "vV32Scv*LLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvldrepl_b, "V32cvC*Ii", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvldrepl_h, "V16svC*Ii", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvldrepl_w, "V8ivC*Ii", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvldrepl_d, "V4LLivC*Ii", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xvstelm_b, "vV32Scv*IiUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvstelm_h, "vV16Ssv*IiUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvstelm_w, "vV8Siv*IiUi", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xvstelm_d, "vV4SLLiv*IiUi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xbz_v, "iV32Uc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xbnz_v, "iV32Uc", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xbz_b, "iV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbz_h, "iV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbz_w, "iV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbz_d, "iV4ULLi", "nc", "lasx") ++ ++TARGET_BUILTIN(__builtin_lasx_xbnz_b, "iV32Uc", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbnz_h, "iV16Us", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbnz_w, "iV8Ui", "nc", "lasx") ++TARGET_BUILTIN(__builtin_lasx_xbnz_d, "iV4ULLi", "nc", "lasx") +diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt +index 1d5573b71e6d..356009ae9157 100644 +--- a/clang/lib/Headers/CMakeLists.txt ++++ b/clang/lib/Headers/CMakeLists.txt +@@ -78,6 +78,7 @@ set(hlsl_files + + set(loongarch_files + larchintrin.h ++ lasxintrin.h + lsxintrin.h + ) + +diff --git a/clang/lib/Headers/lasxintrin.h b/clang/lib/Headers/lasxintrin.h +new file mode 100644 +index 000000000000..6b4d5012a24b +--- /dev/null ++++ b/clang/lib/Headers/lasxintrin.h +@@ -0,0 +1,3860 @@ ++/*===------------ lasxintrin.h - LoongArch LASX intrinsics -----------------=== ++ * ++ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. ++ * See https://llvm.org/LICENSE.txt for license information. ++ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception ++ * ++ *===-----------------------------------------------------------------------=== ++ */ ++ ++#ifndef _LOONGSON_ASXINTRIN_H ++#define _LOONGSON_ASXINTRIN_H 1 ++ ++#if defined(__loongarch_asx) ++ ++typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); ++typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); ++typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); ++typedef short v16i16 __attribute__((vector_size(32), aligned(32))); ++typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); ++typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); ++typedef int v8i32 __attribute__((vector_size(32), aligned(32))); ++typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); ++typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); ++typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); ++typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); ++typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); ++typedef float v8f32 __attribute__((vector_size(32), aligned(32))); ++typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++typedef float __m256 __attribute__((__vector_size__(32), __may_alias__)); ++typedef long long __m256i __attribute__((__vector_size__(32), __may_alias__)); ++typedef double __m256d __attribute__((__vector_size__(32), __may_alias__)); ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsll_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsll_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvslli_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvslli_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvslli_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvslli_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslli_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsra_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsra_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvsrai_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsrai_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsrai_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsrai_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrai_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrar_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrar_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvsrari_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsrari_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsrari_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsrari_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrari_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrl_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrl_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvsrli_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsrli_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsrli_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsrli_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrli_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlr_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvsrlri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsrlri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsrlri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsrlri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsrlri_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_b((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitclr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitclr_d((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvbitclri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_b((v32u8)(_1), (_2))) ++ ++#define __lasx_xvbitclri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_h((v16u16)(_1), (_2))) ++ ++#define __lasx_xvbitclri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_w((v8u32)(_1), (_2))) ++ ++#define __lasx_xvbitclri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitclri_d((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_b((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitset_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitset_d((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvbitseti_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_b((v32u8)(_1), (_2))) ++ ++#define __lasx_xvbitseti_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_h((v16u16)(_1), (_2))) ++ ++#define __lasx_xvbitseti_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_w((v8u32)(_1), (_2))) ++ ++#define __lasx_xvbitseti_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitseti_d((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_b((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitrev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvbitrev_d((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvbitrevi_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_b((v32u8)(_1), (_2))) ++ ++#define __lasx_xvbitrevi_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_h((v16u16)(_1), (_2))) ++ ++#define __lasx_xvbitrevi_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_w((v8u32)(_1), (_2))) ++ ++#define __lasx_xvbitrevi_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbitrevi_d((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvaddi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_bu((v32i8)(_1), (_2))) ++ ++#define __lasx_xvaddi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_hu((v16i16)(_1), (_2))) ++ ++#define __lasx_xvaddi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_wu((v8i32)(_1), (_2))) ++ ++#define __lasx_xvaddi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvaddi_du((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvsubi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_bu((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsubi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_hu((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsubi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_wu((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsubi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsubi_du((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvmaxi_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvmaxi_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvmaxi_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvmaxi_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmax_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmax_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvmaxi_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvmaxi_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvmaxi_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_wu((v8u32)(_1), (_2))) ++ ++#define __lasx_xvmaxi_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmaxi_du((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvmini_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvmini_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvmini_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvmini_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmin_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmin_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvmini_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvmini_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvmini_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_wu((v8u32)(_1), (_2))) ++ ++#define __lasx_xvmini_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvmini_du((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvseq_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvseq_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvseqi_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvseqi_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvseqi_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvseqi_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvseqi_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvslti_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvslti_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvslti_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvslti_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvslt_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvslt_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvslti_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvslti_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvslti_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_wu((v8u32)(_1), (_2))) ++ ++#define __lasx_xvslti_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslti_du((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_d((v4i64)_1, (v4i64)_2); ++} ++ ++#define __lasx_xvslei_b(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvslei_h(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvslei_w(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvslei_d(/*__m256i*/ _1, /*si5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsle_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsle_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvslei_bu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvslei_hu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvslei_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_wu((v8u32)(_1), (_2))) ++ ++#define __lasx_xvslei_du(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvslei_du((v4u64)(_1), (_2))) ++ ++#define __lasx_xvsat_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsat_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsat_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsat_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_d((v4i64)(_1), (_2))) ++ ++#define __lasx_xvsat_bu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvsat_hu(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvsat_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_wu((v8u32)(_1), (_2))) ++ ++#define __lasx_xvsat_du(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsat_du((v4u64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadda_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadda_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsadd_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsadd_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavg_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavg_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvavgr_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvavgr_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssub_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssub_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvabsd_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvabsd_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmul_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmul_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmadd_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmadd_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsub_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmsub_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvdiv_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvdiv_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_hu_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_hu_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_wu_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_wu_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_du_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_du_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_hu_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_hu_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_wu_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_wu_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_du_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_du_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmod_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmod_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvrepl128vei_b(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvrepl128vei_h(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvrepl128vei_w(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvrepl128vei_d(/*__m256i*/ _1, /*ui1*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrepl128vei_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickev_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpickod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpickod_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvh_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvh_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvilvl_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvilvl_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackev_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackev_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpackod_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvpackod_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_w((v8i32)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvshuf_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvshuf_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvand_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvand_v((v32u8)_1, (v32u8)_2); ++} ++ ++#define __lasx_xvandi_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvandi_b((v32u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvor_v((v32u8)_1, (v32u8)_2); ++} ++ ++#define __lasx_xvori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvori_b((v32u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvnor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvnor_v((v32u8)_1, (v32u8)_2); ++} ++ ++#define __lasx_xvnori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvnori_b((v32u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvxor_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvxor_v((v32u8)_1, (v32u8)_2); ++} ++ ++#define __lasx_xvxori_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvxori_b((v32u8)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvbitsel_v(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvbitsel_v((v32u8)_1, (v32u8)_2, (v32u8)_3); ++} ++ ++#define __lasx_xvbitseli_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvbitseli_b((v32u8)(_1), (v32u8)(_2), (_3))) ++ ++#define __lasx_xvshuf4i_b(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvshuf4i_h(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvshuf4i_w(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_w((v8i32)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_b(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_b((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_h(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_h((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_w(int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_w((int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplgr2vr_d(long int _1) { ++ return (__m256i)__builtin_lasx_xvreplgr2vr_d((long int)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvpcnt_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvpcnt_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclo_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclo_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvclz_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvclz_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfadd_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfadd_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfadd_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfadd_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfsub_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfsub_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfsub_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfsub_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmul_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmul_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmul_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmul_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfdiv_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfdiv_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfdiv_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfdiv_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcvt_h_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcvt_h_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvt_s_d(__m256d _1, __m256d _2) { ++ return (__m256)__builtin_lasx_xvfcvt_s_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmin_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmin_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmin_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmin_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmina_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmina_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmina_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmina_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmax_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmax_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmax_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmax_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmaxa_s(__m256 _1, __m256 _2) { ++ return (__m256)__builtin_lasx_xvfmaxa_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmaxa_d(__m256d _1, __m256d _2) { ++ return (__m256d)__builtin_lasx_xvfmaxa_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfclass_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvfclass_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfclass_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvfclass_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfsqrt_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfsqrt_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfsqrt_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfsqrt_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrecip_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrecip_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrecip_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrecip_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrint_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrint_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrint_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrint_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrsqrt_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrsqrt_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrsqrt_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrsqrt_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvflogb_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvflogb_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvflogb_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvflogb_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvth_s_h(__m256i _1) { ++ return (__m256)__builtin_lasx_xvfcvth_s_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfcvth_d_s(__m256 _1) { ++ return (__m256d)__builtin_lasx_xvfcvth_d_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfcvtl_s_h(__m256i _1) { ++ return (__m256)__builtin_lasx_xvfcvtl_s_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfcvtl_d_s(__m256 _1) { ++ return (__m256d)__builtin_lasx_xvfcvtl_d_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftint_w_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftint_l_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_wu_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftint_wu_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_lu_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftint_lu_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_w_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_l_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_wu_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_wu_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_lu_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrz_lu_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_w(__m256i _1) { ++ return (__m256)__builtin_lasx_xvffint_s_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffint_d_l(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffint_d_l((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_wu(__m256i _1) { ++ return (__m256)__builtin_lasx_xvffint_s_wu((v8u32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffint_d_lu(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffint_d_lu((v4u64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_b(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_b((v32i8)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_h(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_h((v16i16)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_w(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_w((v8i32)_1, (int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve_d(__m256i _1, int _2) { ++ return (__m256i)__builtin_lasx_xvreplve_d((v4i64)_1, (int)_2); ++} ++ ++#define __lasx_xvpermi_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvpermi_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvandn_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvandn_v((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvneg_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvneg_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmuh_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmuh_du((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvsllwil_h_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_h_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvsllwil_w_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_w_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvsllwil_d_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_d_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvsllwil_hu_bu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_hu_bu((v32u8)(_1), (_2))) ++ ++#define __lasx_xvsllwil_wu_hu(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_wu_hu((v16u16)(_1), (_2))) ++ ++#define __lasx_xvsllwil_du_wu(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvsllwil_du_wu((v8u32)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsran_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsran_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssran_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssran_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrarn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrarn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrarn_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrarn_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrln_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrln_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsrlrn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsrlrn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_bu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_bu_h((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_hu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_hu_w((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_wu_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_wu_d((v4u64)_1, (v4u64)_2); ++} ++ ++#define __lasx_xvfrstpi_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvfrstpi_b((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvfrstpi_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvfrstpi_h((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrstp_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvfrstp_b((v32i8)_1, (v32i8)_2, (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfrstp_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvfrstp_h((v16i16)_1, (v16i16)_2, (v16i16)_3); ++} ++ ++#define __lasx_xvshuf4i_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvshuf4i_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvbsrl_v(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbsrl_v((v32i8)(_1), (_2))) ++ ++#define __lasx_xvbsll_v(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvbsll_v((v32i8)(_1), (_2))) ++ ++#define __lasx_xvextrins_b(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_b((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvextrins_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_h((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvextrins_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvextrins_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvextrins_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskltz_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskltz_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsigncov_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsigncov_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmadd_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmadd_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfmsub_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfmsub_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfnmadd_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfnmadd_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfnmadd_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfnmadd_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfnmsub_s(__m256 _1, __m256 _2, __m256 _3) { ++ return (__m256)__builtin_lasx_xvfnmsub_s((v8f32)_1, (v8f32)_2, (v8f32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfnmsub_d(__m256d _1, __m256d _2, __m256d _3) { ++ return (__m256d)__builtin_lasx_xvfnmsub_d((v4f64)_1, (v4f64)_2, (v4f64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrne_w_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrne_l_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrp_w_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrp_l_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_w_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrm_w_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_l_d(__m256d _1) { ++ return (__m256i)__builtin_lasx_xvftintrm_l_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftint_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftint_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvffint_s_l(__m256i _1, __m256i _2) { ++ return (__m256)__builtin_lasx_xvffint_s_l((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrz_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrz_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrp_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrp_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrm_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrm_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrne_w_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvftintrne_w_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftinth_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftinth_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintl_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffinth_d_w(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffinth_d_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvffintl_d_w(__m256i _1) { ++ return (__m256d)__builtin_lasx_xvffintl_d_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrzh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrzh_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrzl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrzl_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrph_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrph_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrpl_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrpl_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrmh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrmh_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrml_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrml_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrneh_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrneh_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvftintrnel_l_s(__m256 _1) { ++ return (__m256i)__builtin_lasx_xvftintrnel_l_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrne_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrne_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrne_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrne_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrz_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrz_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrz_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrz_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrp_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrp_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrp_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrp_d((v4f64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256 ++ __lasx_xvfrintrm_s(__m256 _1) { ++ return (__m256)__builtin_lasx_xvfrintrm_s((v8f32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256d ++ __lasx_xvfrintrm_d(__m256d _1) { ++ return (__m256d)__builtin_lasx_xvfrintrm_d((v4f64)_1); ++} ++ ++#define __lasx_xvld(/*void **/ _1, /*si12*/ _2) \ ++ ((__m256i)__builtin_lasx_xvld((void const *)(_1), (_2))) ++ ++#define __lasx_xvst(/*__m256i*/ _1, /*void **/ _2, /*si12*/ _3) \ ++ ((void)__builtin_lasx_xvst((v32i8)(_1), (void *)(_2), (_3))) ++ ++#define __lasx_xvstelm_b(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_b((v32i8)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lasx_xvstelm_h(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_h((v16i16)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lasx_xvstelm_w(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_w((v8i32)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lasx_xvstelm_d(/*__m256i*/ _1, /*void **/ _2, /*si8*/ _3, \ ++ /*idx*/ _4) \ ++ ((void)__builtin_lasx_xvstelm_d((v4i64)(_1), (void *)(_2), (_3), (_4))) ++ ++#define __lasx_xvinsve0_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui3*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsve0_w((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvinsve0_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui2*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsve0_d((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvpickve_w(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpickve_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvpickve_d(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpickve_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrlrn_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrlrn_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_b_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_b_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_h_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_h_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvssrln_w_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvssrln_w_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvorn_v(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvorn_v((v32i8)_1, (v32i8)_2); ++} ++ ++#define __lasx_xvldi(/*i13*/ _1) ((__m256i)__builtin_lasx_xvldi((_1))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvldx(void const *_1, long int _2) { ++ return (__m256i)__builtin_lasx_xvldx((void const *)_1, (long int)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) void ++ __lasx_xvstx(__m256i _1, void *_2, long int _3) { ++ return (void)__builtin_lasx_xvstx((v32i8)_1, (void *)_2, (long int)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvextl_qu_du(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvextl_qu_du((v4u64)_1); ++} ++ ++#define __lasx_xvinsgr2vr_w(/*__m256i*/ _1, /*int*/ _2, /*ui3*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsgr2vr_w((v8i32)(_1), (int)(_2), (_3))) ++ ++#define __lasx_xvinsgr2vr_d(/*__m256i*/ _1, /*long int*/ _2, /*ui2*/ _3) \ ++ ((__m256i)__builtin_lasx_xvinsgr2vr_d((v4i64)(_1), (long int)(_2), (_3))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvreplve0_q(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvreplve0_q((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_h_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_h_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_w_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_w_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_w_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_w_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_d_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_d_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_hu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_hu_bu((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_wu_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_wu_hu((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_wu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_wu((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_wu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_wu_bu((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_hu((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_vext2xv_du_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_vext2xv_du_bu((v32i8)_1); ++} ++ ++#define __lasx_xvpermi_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui8*/ _3) \ ++ ((__m256i)__builtin_lasx_xvpermi_q((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvpermi_d(/*__m256i*/ _1, /*ui8*/ _2) \ ++ ((__m256i)__builtin_lasx_xvpermi_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvperm_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvperm_w((v8i32)_1, (v8i32)_2); ++} ++ ++#define __lasx_xvldrepl_b(/*void **/ _1, /*si12*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_b((void const *)(_1), (_2))) ++ ++#define __lasx_xvldrepl_h(/*void **/ _1, /*si11*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_h((void const *)(_1), (_2))) ++ ++#define __lasx_xvldrepl_w(/*void **/ _1, /*si10*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_w((void const *)(_1), (_2))) ++ ++#define __lasx_xvldrepl_d(/*void **/ _1, /*si9*/ _2) \ ++ ((__m256i)__builtin_lasx_xvldrepl_d((void const *)(_1), (_2))) ++ ++#define __lasx_xvpickve2gr_w(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((int)__builtin_lasx_xvpickve2gr_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvpickve2gr_wu(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((unsigned int)__builtin_lasx_xvpickve2gr_wu((v8i32)(_1), (_2))) ++ ++#define __lasx_xvpickve2gr_d(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((long int)__builtin_lasx_xvpickve2gr_d((v4i64)(_1), (_2))) ++ ++#define __lasx_xvpickve2gr_du(/*__m256i*/ _1, /*ui2*/ _2) \ ++ ((unsigned long int)__builtin_lasx_xvpickve2gr_du((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsubwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsubwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_wu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_wu((v8u32)_1, (v8u32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_hu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_hu((v16u16)_1, (v16u16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_bu(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_bu((v32u8)_1, (v32u8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_d_wu_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_d_wu_w((v8u32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_w_hu_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_w_hu_h((v16u16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_h_bu_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_h_bu_b((v32u8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhaddw_qu_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhaddw_qu_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_q_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_q_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvhsubw_qu_du(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvhsubw_qu_du((v4u64)_1, (v4u64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_h((v8i32)_1, (v16i16)_2, ++ (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_b((v16i16)_1, (v32i8)_2, ++ (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_du(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_du((v4u64)_1, (v4u64)_2, ++ (v4u64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_wu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_wu((v4u64)_1, (v8u32)_2, ++ (v8u32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_hu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_hu((v8u32)_1, (v16u16)_2, ++ (v16u16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_bu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_bu((v16u16)_1, (v32u8)_2, ++ (v32u8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_d((v4i64)_1, (v4i64)_2, (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_w((v4i64)_1, (v8i32)_2, (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_h((v8i32)_1, (v16i16)_2, ++ (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_b((v16i16)_1, (v32i8)_2, ++ (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_du(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_du((v4u64)_1, (v4u64)_2, ++ (v4u64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_wu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_wu((v4u64)_1, (v8u32)_2, ++ (v8u32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_hu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_hu((v8u32)_1, (v16u16)_2, ++ (v16u16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_bu(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_bu((v16u16)_1, (v32u8)_2, ++ (v32u8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_q_du_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_q_du_d((v4i64)_1, (v4u64)_2, ++ (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_d_wu_w((v4i64)_1, (v8u32)_2, ++ (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_w_hu_h((v8i32)_1, (v16u16)_2, ++ (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwev_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwev_h_bu_b((v16i16)_1, (v32u8)_2, ++ (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_q_du_d(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_q_du_d((v4i64)_1, (v4u64)_2, ++ (v4i64)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_d_wu_w(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_d_wu_w((v4i64)_1, (v8u32)_2, ++ (v8i32)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_w_hu_h(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_w_hu_h((v8i32)_1, (v16u16)_2, ++ (v16i16)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmaddwod_h_bu_b(__m256i _1, __m256i _2, __m256i _3) { ++ return (__m256i)__builtin_lasx_xvmaddwod_h_bu_b((v16i16)_1, (v32u8)_2, ++ (v32i8)_3); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_b(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_b((v32i8)_1, (v32i8)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_h(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_h((v16i16)_1, (v16i16)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_w(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_w((v8i32)_1, (v8i32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvrotr_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvrotr_d((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvadd_q(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvadd_q((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvsub_q(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvsub_q((v4i64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwev_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwev_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvaddwod_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvaddwod_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwev_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwev_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmulwod_q_du_d(__m256i _1, __m256i _2) { ++ return (__m256i)__builtin_lasx_xvmulwod_q_du_d((v4u64)_1, (v4i64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmskgez_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmskgez_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvmsknz_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvmsknz_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_h_b(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_h_b((v32i8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_w_h(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_w_h((v16i16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_d_w(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_d_w((v8i32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_q_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_q_d((v4i64)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_hu_bu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_hu_bu((v32u8)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_wu_hu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_wu_hu((v16u16)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_du_wu(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_du_wu((v8u32)_1); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvexth_qu_du(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvexth_qu_du((v4u64)_1); ++} ++ ++#define __lasx_xvrotri_b(/*__m256i*/ _1, /*ui3*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_b((v32i8)(_1), (_2))) ++ ++#define __lasx_xvrotri_h(/*__m256i*/ _1, /*ui4*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_h((v16i16)(_1), (_2))) ++ ++#define __lasx_xvrotri_w(/*__m256i*/ _1, /*ui5*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_w((v8i32)(_1), (_2))) ++ ++#define __lasx_xvrotri_d(/*__m256i*/ _1, /*ui6*/ _2) \ ++ ((__m256i)__builtin_lasx_xvrotri_d((v4i64)(_1), (_2))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvextl_q_d(__m256i _1) { ++ return (__m256i)__builtin_lasx_xvextl_q_d((v4i64)_1); ++} ++ ++#define __lasx_xvsrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvsrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvsrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvsrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvsrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvsrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvsrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvsrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrlni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrlni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrlni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrlni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrlni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrlni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrlni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrlni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrlrni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrlrni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvsrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvsrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvsrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvsrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvsrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvsrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvsrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvsrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvsrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrani_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrani_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrani_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrani_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrani_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrani_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrani_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrani_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrani_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrarni_b_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_b_h((v32i8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrarni_h_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_h_w((v16i16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrarni_w_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_w_d((v8i32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrarni_d_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_d_q((v4i64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xvssrarni_bu_h(/*__m256i*/ _1, /*__m256i*/ _2, /*ui4*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_bu_h((v32u8)(_1), (v32i8)(_2), (_3))) ++ ++#define __lasx_xvssrarni_hu_w(/*__m256i*/ _1, /*__m256i*/ _2, /*ui5*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_hu_w((v16u16)(_1), (v16i16)(_2), (_3))) ++ ++#define __lasx_xvssrarni_wu_d(/*__m256i*/ _1, /*__m256i*/ _2, /*ui6*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_wu_d((v8u32)(_1), (v8i32)(_2), (_3))) ++ ++#define __lasx_xvssrarni_du_q(/*__m256i*/ _1, /*__m256i*/ _2, /*ui7*/ _3) \ ++ ((__m256i)__builtin_lasx_xvssrarni_du_q((v4u64)(_1), (v4i64)(_2), (_3))) ++ ++#define __lasx_xbnz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_b((v32u8)(_1))) ++ ++#define __lasx_xbnz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_d((v4u64)(_1))) ++ ++#define __lasx_xbnz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_h((v16u16)(_1))) ++ ++#define __lasx_xbnz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_v((v32u8)(_1))) ++ ++#define __lasx_xbnz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbnz_w((v8u32)(_1))) ++ ++#define __lasx_xbz_b(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_b((v32u8)(_1))) ++ ++#define __lasx_xbz_d(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_d((v4u64)(_1))) ++ ++#define __lasx_xbz_h(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_h((v16u16)(_1))) ++ ++#define __lasx_xbz_v(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_v((v32u8)(_1))) ++ ++#define __lasx_xbz_w(/*__m256i*/ _1) ((int)__builtin_lasx_xbz_w((v8u32)(_1))) ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_caf_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_caf_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_caf_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_caf_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_ceq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_ceq_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_ceq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_ceq_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cle_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cle_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cle_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cle_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_clt_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_clt_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_clt_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_clt_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cne_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cne_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cne_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cne_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cor_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cor_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cor_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cor_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cueq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cueq_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cueq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cueq_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cule_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cule_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cule_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cule_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cult_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cult_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cult_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cult_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cun_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cun_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cune_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cune_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cune_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cune_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_cun_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_cun_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_saf_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_saf_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_saf_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_saf_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_seq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_seq_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_seq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_seq_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sle_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sle_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sle_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sle_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_slt_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_slt_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_slt_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_slt_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sne_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sne_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sne_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sne_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sor_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sor_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sor_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sor_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sueq_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sueq_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sueq_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sueq_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sule_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sule_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sule_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sule_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sult_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sult_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sult_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sult_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sun_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sun_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sune_d(__m256d _1, __m256d _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sune_d((v4f64)_1, (v4f64)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sune_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sune_s((v8f32)_1, (v8f32)_2); ++} ++ ++extern __inline ++ __attribute__((__gnu_inline__, __always_inline__, __artificial__)) __m256i ++ __lasx_xvfcmp_sun_s(__m256 _1, __m256 _2) { ++ return (__m256i)__builtin_lasx_xvfcmp_sun_s((v8f32)_1, (v8f32)_2); ++} ++ ++#define __lasx_xvpickve_d_f(/*__m256d*/ _1, /*ui2*/ _2) \ ++ ((__m256d)__builtin_lasx_xvpickve_d_f((v4f64)(_1), (_2))) ++ ++#define __lasx_xvpickve_w_f(/*__m256*/ _1, /*ui3*/ _2) \ ++ ((__m256)__builtin_lasx_xvpickve_w_f((v8f32)(_1), (_2))) ++ ++#define __lasx_xvrepli_b(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_b((_1))) ++ ++#define __lasx_xvrepli_d(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_d((_1))) ++ ++#define __lasx_xvrepli_h(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_h((_1))) ++ ++#define __lasx_xvrepli_w(/*si10*/ _1) ((__m256i)__builtin_lasx_xvrepli_w((_1))) ++ ++#endif /* defined(__loongarch_asx). */ ++#endif /* _LOONGSON_ASXINTRIN_H. */ +diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp +index d1b015502725..5ee20554c4cf 100644 +--- a/clang/lib/Sema/SemaChecking.cpp ++++ b/clang/lib/Sema/SemaChecking.cpp +@@ -4082,6 +4082,233 @@ bool Sema::CheckLoongArchBuiltinFunctionCall(const TargetInfo &TI, + case LoongArch::BI__builtin_lsx_vrepli_w: + case LoongArch::BI__builtin_lsx_vrepli_d: + return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); ++ ++ // LASX intrinsics. ++ case LoongArch::BI__builtin_lasx_xvbitclri_b: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_b: ++ case LoongArch::BI__builtin_lasx_xvbitseti_b: ++ case LoongArch::BI__builtin_lasx_xvsat_b: ++ case LoongArch::BI__builtin_lasx_xvsat_bu: ++ case LoongArch::BI__builtin_lasx_xvslli_b: ++ case LoongArch::BI__builtin_lasx_xvsrai_b: ++ case LoongArch::BI__builtin_lasx_xvsrari_b: ++ case LoongArch::BI__builtin_lasx_xvsrli_b: ++ case LoongArch::BI__builtin_lasx_xvsllwil_h_b: ++ case LoongArch::BI__builtin_lasx_xvsllwil_hu_bu: ++ case LoongArch::BI__builtin_lasx_xvrotri_b: ++ case LoongArch::BI__builtin_lasx_xvsrlri_b: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); ++ case LoongArch::BI__builtin_lasx_xvbitclri_h: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_h: ++ case LoongArch::BI__builtin_lasx_xvbitseti_h: ++ case LoongArch::BI__builtin_lasx_xvsat_h: ++ case LoongArch::BI__builtin_lasx_xvsat_hu: ++ case LoongArch::BI__builtin_lasx_xvslli_h: ++ case LoongArch::BI__builtin_lasx_xvsrai_h: ++ case LoongArch::BI__builtin_lasx_xvsrari_h: ++ case LoongArch::BI__builtin_lasx_xvsrli_h: ++ case LoongArch::BI__builtin_lasx_xvsllwil_w_h: ++ case LoongArch::BI__builtin_lasx_xvsllwil_wu_hu: ++ case LoongArch::BI__builtin_lasx_xvrotri_h: ++ case LoongArch::BI__builtin_lasx_xvsrlri_h: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); ++ case LoongArch::BI__builtin_lasx_xvssrarni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrarni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrani_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrani_bu_h: ++ case LoongArch::BI__builtin_lasx_xvsrarni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrlni_b_h: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrlni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrlni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_b_h: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_bu_h: ++ case LoongArch::BI__builtin_lasx_xvsrani_b_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 15); ++ case LoongArch::BI__builtin_lasx_xvslei_bu: ++ case LoongArch::BI__builtin_lasx_xvslei_hu: ++ case LoongArch::BI__builtin_lasx_xvslei_wu: ++ case LoongArch::BI__builtin_lasx_xvslei_du: ++ case LoongArch::BI__builtin_lasx_xvslti_bu: ++ case LoongArch::BI__builtin_lasx_xvslti_hu: ++ case LoongArch::BI__builtin_lasx_xvslti_wu: ++ case LoongArch::BI__builtin_lasx_xvslti_du: ++ case LoongArch::BI__builtin_lasx_xvmaxi_bu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_hu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_wu: ++ case LoongArch::BI__builtin_lasx_xvmaxi_du: ++ case LoongArch::BI__builtin_lasx_xvmini_bu: ++ case LoongArch::BI__builtin_lasx_xvmini_hu: ++ case LoongArch::BI__builtin_lasx_xvmini_wu: ++ case LoongArch::BI__builtin_lasx_xvmini_du: ++ case LoongArch::BI__builtin_lasx_xvaddi_bu: ++ case LoongArch::BI__builtin_lasx_xvaddi_hu: ++ case LoongArch::BI__builtin_lasx_xvaddi_wu: ++ case LoongArch::BI__builtin_lasx_xvaddi_du: ++ case LoongArch::BI__builtin_lasx_xvbitclri_w: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_w: ++ case LoongArch::BI__builtin_lasx_xvbitseti_w: ++ case LoongArch::BI__builtin_lasx_xvsat_w: ++ case LoongArch::BI__builtin_lasx_xvsat_wu: ++ case LoongArch::BI__builtin_lasx_xvslli_w: ++ case LoongArch::BI__builtin_lasx_xvsrai_w: ++ case LoongArch::BI__builtin_lasx_xvsrari_w: ++ case LoongArch::BI__builtin_lasx_xvsrli_w: ++ case LoongArch::BI__builtin_lasx_xvsllwil_d_w: ++ case LoongArch::BI__builtin_lasx_xvsllwil_du_wu: ++ case LoongArch::BI__builtin_lasx_xvsrlri_w: ++ case LoongArch::BI__builtin_lasx_xvrotri_w: ++ case LoongArch::BI__builtin_lasx_xvsubi_bu: ++ case LoongArch::BI__builtin_lasx_xvsubi_hu: ++ case LoongArch::BI__builtin_lasx_xvsubi_wu: ++ case LoongArch::BI__builtin_lasx_xvsubi_du: ++ case LoongArch::BI__builtin_lasx_xvbsrl_v: ++ case LoongArch::BI__builtin_lasx_xvbsll_v: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 31); ++ case LoongArch::BI__builtin_lasx_xvssrarni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrarni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrani_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrani_hu_w: ++ case LoongArch::BI__builtin_lasx_xvsrarni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrani_h_w: ++ case LoongArch::BI__builtin_lasx_xvfrstpi_b: ++ case LoongArch::BI__builtin_lasx_xvfrstpi_h: ++ case LoongArch::BI__builtin_lasx_xvsrlni_h_w: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrlni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrlni_hu_w: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_h_w: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_hu_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 31); ++ case LoongArch::BI__builtin_lasx_xvbitclri_d: ++ case LoongArch::BI__builtin_lasx_xvbitrevi_d: ++ case LoongArch::BI__builtin_lasx_xvbitseti_d: ++ case LoongArch::BI__builtin_lasx_xvsat_d: ++ case LoongArch::BI__builtin_lasx_xvsat_du: ++ case LoongArch::BI__builtin_lasx_xvslli_d: ++ case LoongArch::BI__builtin_lasx_xvsrai_d: ++ case LoongArch::BI__builtin_lasx_xvsrli_d: ++ case LoongArch::BI__builtin_lasx_xvsrari_d: ++ case LoongArch::BI__builtin_lasx_xvrotri_d: ++ case LoongArch::BI__builtin_lasx_xvsrlri_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 63); ++ case LoongArch::BI__builtin_lasx_xvssrarni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrarni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrani_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrani_wu_d: ++ case LoongArch::BI__builtin_lasx_xvsrarni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrlni_w_d: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrlni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrlni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_w_d: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_wu_d: ++ case LoongArch::BI__builtin_lasx_xvsrani_w_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 63); ++ case LoongArch::BI__builtin_lasx_xvssrarni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrarni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrani_du_q: ++ case LoongArch::BI__builtin_lasx_xvsrarni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrlni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrlni_du_q: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_d_q: ++ case LoongArch::BI__builtin_lasx_xvssrlrni_du_q: ++ case LoongArch::BI__builtin_lasx_xvsrani_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrlni_d_q: ++ case LoongArch::BI__builtin_lasx_xvsrlrni_d_q: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 127); ++ case LoongArch::BI__builtin_lasx_xvseqi_b: ++ case LoongArch::BI__builtin_lasx_xvseqi_h: ++ case LoongArch::BI__builtin_lasx_xvseqi_w: ++ case LoongArch::BI__builtin_lasx_xvseqi_d: ++ case LoongArch::BI__builtin_lasx_xvslti_b: ++ case LoongArch::BI__builtin_lasx_xvslti_h: ++ case LoongArch::BI__builtin_lasx_xvslti_w: ++ case LoongArch::BI__builtin_lasx_xvslti_d: ++ case LoongArch::BI__builtin_lasx_xvslei_b: ++ case LoongArch::BI__builtin_lasx_xvslei_h: ++ case LoongArch::BI__builtin_lasx_xvslei_w: ++ case LoongArch::BI__builtin_lasx_xvslei_d: ++ case LoongArch::BI__builtin_lasx_xvmaxi_b: ++ case LoongArch::BI__builtin_lasx_xvmaxi_h: ++ case LoongArch::BI__builtin_lasx_xvmaxi_w: ++ case LoongArch::BI__builtin_lasx_xvmaxi_d: ++ case LoongArch::BI__builtin_lasx_xvmini_b: ++ case LoongArch::BI__builtin_lasx_xvmini_h: ++ case LoongArch::BI__builtin_lasx_xvmini_w: ++ case LoongArch::BI__builtin_lasx_xvmini_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -16, 15); ++ case LoongArch::BI__builtin_lasx_xvandi_b: ++ case LoongArch::BI__builtin_lasx_xvnori_b: ++ case LoongArch::BI__builtin_lasx_xvori_b: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_b: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_h: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_w: ++ case LoongArch::BI__builtin_lasx_xvxori_b: ++ case LoongArch::BI__builtin_lasx_xvpermi_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 255); ++ case LoongArch::BI__builtin_lasx_xvbitseli_b: ++ case LoongArch::BI__builtin_lasx_xvshuf4i_d: ++ case LoongArch::BI__builtin_lasx_xvextrins_b: ++ case LoongArch::BI__builtin_lasx_xvextrins_h: ++ case LoongArch::BI__builtin_lasx_xvextrins_w: ++ case LoongArch::BI__builtin_lasx_xvextrins_d: ++ case LoongArch::BI__builtin_lasx_xvpermi_q: ++ case LoongArch::BI__builtin_lasx_xvpermi_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 255); ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_b: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 15); ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_h: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_w: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_wu: ++ case LoongArch::BI__builtin_lasx_xvpickve_w_f: ++ case LoongArch::BI__builtin_lasx_xvpickve_w: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); ++ case LoongArch::BI__builtin_lasx_xvinsgr2vr_w: ++ case LoongArch::BI__builtin_lasx_xvinsve0_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 7); ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_w: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_d: ++ case LoongArch::BI__builtin_lasx_xvpickve2gr_du: ++ case LoongArch::BI__builtin_lasx_xvpickve_d_f: ++ case LoongArch::BI__builtin_lasx_xvpickve_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); ++ case LoongArch::BI__builtin_lasx_xvinsve0_d: ++ case LoongArch::BI__builtin_lasx_xvinsgr2vr_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, 0, 3); ++ case LoongArch::BI__builtin_lasx_xvstelm_b: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -128, 127) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 31); ++ case LoongArch::BI__builtin_lasx_xvstelm_h: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -256, 254) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 15); ++ case LoongArch::BI__builtin_lasx_xvstelm_w: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -512, 508) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 7); ++ case LoongArch::BI__builtin_lasx_xvstelm_d: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -1024, 1016) || ++ SemaBuiltinConstantArgRange(TheCall, 3, 0, 3); ++ case LoongArch::BI__builtin_lasx_xvrepl128vei_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); ++ case LoongArch::BI__builtin_lasx_xvldrepl_b: ++ case LoongArch::BI__builtin_lasx_xvld: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2047); ++ case LoongArch::BI__builtin_lasx_xvldrepl_h: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2046); ++ case LoongArch::BI__builtin_lasx_xvldrepl_w: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2044); ++ case LoongArch::BI__builtin_lasx_xvldrepl_d: ++ return SemaBuiltinConstantArgRange(TheCall, 1, -2048, 2040); ++ case LoongArch::BI__builtin_lasx_xvst: ++ return SemaBuiltinConstantArgRange(TheCall, 2, -2048, 2047); ++ case LoongArch::BI__builtin_lasx_xvldi: ++ return SemaBuiltinConstantArgRange(TheCall, 0, -4096, 4095); ++ case LoongArch::BI__builtin_lasx_xvrepli_b: ++ case LoongArch::BI__builtin_lasx_xvrepli_h: ++ case LoongArch::BI__builtin_lasx_xvrepli_w: ++ case LoongArch::BI__builtin_lasx_xvrepli_d: ++ return SemaBuiltinConstantArgRange(TheCall, 0, -512, 511); + } + return false; + } +-- +2.20.1 + + +From 7d01676815acf376a054bbe9b10586dbc5f80d49 Mon Sep 17 00:00:00 2001 +From: chenli <chenli@loongson.cn> +Date: Fri, 27 Oct 2023 15:57:30 +0800 +Subject: PATCH 6/8 LoongArchCodeGen Add LSX builtin testcases + +(cherry picked from commit 673c530837faa5ddb45769ddee01d09e1f73d406) +--- + .../LoongArch/lsx/builtin-alias-error.c | 1359 +++++ + .../CodeGen/LoongArch/lsx/builtin-alias.c | 4451 ++++++++++++++ + .../CodeGen/LoongArch/lsx/builtin-error.c | 1382 +++++ + clang/test/CodeGen/LoongArch/lsx/builtin.c | 5193 +++++++++++++++++ + 4 files changed, 12385 insertions(+) + create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c + create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-alias.c + create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin-error.c + create mode 100644 clang/test/CodeGen/LoongArch/lsx/builtin.c + +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c +new file mode 100644 +index 000000000000..69cf2254fdd7 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias-error.c +@@ -0,0 +1,1359 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s ++ ++#include <lsxintrin.h> ++ ++v16i8 vslli_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lsx_vslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lsx_vslli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslli_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vslli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslli_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vslli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslli_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vslli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrai_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lsx_vsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lsx_vsrai_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrai_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vsrai_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrai_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vsrai_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrai_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vsrai_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrari_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lsx_vsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lsx_vsrari_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrari_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vsrari_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrari_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vsrari_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrari_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vsrari_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrli_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lsx_vsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lsx_vsrli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrli_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vsrli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrli_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vsrli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrli_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vsrli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlri_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lsx_vsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lsx_vsrlri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlri_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vsrlri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlri_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vsrlri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlri_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vsrlri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitclri_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lsx_vbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lsx_vbitclri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitclri_h(v8u16 _1, int var) { ++ v8u16 res = __lsx_vbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vbitclri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitclri_w(v4u32 _1, int var) { ++ v4u32 res = __lsx_vbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vbitclri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitclri_d(v2u64 _1, int var) { ++ v2u64 res = __lsx_vbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vbitclri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitseti_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lsx_vbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lsx_vbitseti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitseti_h(v8u16 _1, int var) { ++ v8u16 res = __lsx_vbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vbitseti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitseti_w(v4u32 _1, int var) { ++ v4u32 res = __lsx_vbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vbitseti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitseti_d(v2u64 _1, int var) { ++ v2u64 res = __lsx_vbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vbitseti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitrevi_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lsx_vbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lsx_vbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitrevi_h(v8u16 _1, int var) { ++ v8u16 res = __lsx_vbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitrevi_w(v4u32 _1, int var) { ++ v4u32 res = __lsx_vbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitrevi_d(v2u64 _1, int var) { ++ v2u64 res = __lsx_vbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vaddi_bu(v16i8 _1, int var) { ++ v16i8 res = __lsx_vaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vaddi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vaddi_hu(v8i16 _1, int var) { ++ v8i16 res = __lsx_vaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vaddi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vaddi_wu(v4i32 _1, int var) { ++ v4i32 res = __lsx_vaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vaddi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vaddi_du(v2i64 _1, int var) { ++ v2i64 res = __lsx_vaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vaddi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsubi_bu(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vsubi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsubi_hu(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vsubi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsubi_wu(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vsubi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsubi_du(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vsubi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vmaxi_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vmaxi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vmaxi_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vmaxi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vmaxi_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vmaxi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vmaxi_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vmaxi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vmaxi_bu(v16u8 _1, int var) { ++ v16u8 res = __lsx_vmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vmaxi_hu(v8u16 _1, int var) { ++ v8u16 res = __lsx_vmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vmaxi_wu(v4u32 _1, int var) { ++ v4u32 res = __lsx_vmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vmaxi_du(v2u64 _1, int var) { ++ v2u64 res = __lsx_vmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vmaxi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vmini_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vmini_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vmini_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vmini_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_h' must be a constant integer}}} ++ return res; ++} ++ ++v4i32 vmini_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vmini_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vmini_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vmini_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vmini_bu(v16u8 _1, int var) { ++ v16u8 res = __lsx_vmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vmini_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vmini_hu(v8u16 _1, int var) { ++ v8u16 res = __lsx_vmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vmini_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vmini_wu(v4u32 _1, int var) { ++ v4u32 res = __lsx_vmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vmini_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vmini_du(v2u64 _1, int var) { ++ v2u64 res = __lsx_vmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vmini_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vseqi_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vseqi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vseqi_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vseqi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vseqi_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vseqi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vseqi_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vseqi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslti_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vslti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslti_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vslti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslti_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vslti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslti_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vslti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslti_bu(v16u8 _1, int var) { ++ v16i8 res = __lsx_vslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vslti_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslti_hu(v8u16 _1, int var) { ++ v8i16 res = __lsx_vslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vslti_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslti_wu(v4u32 _1, int var) { ++ v4i32 res = __lsx_vslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vslti_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslti_du(v2u64 _1, int var) { ++ v2i64 res = __lsx_vslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vslti_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslei_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vslei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslei_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vslei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslei_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vslei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslei_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lsx_vslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lsx_vslei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslei_bu(v16u8 _1, int var) { ++ v16i8 res = __lsx_vslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vslei_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslei_hu(v8u16 _1, int var) { ++ v8i16 res = __lsx_vslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vslei_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslei_wu(v4u32 _1, int var) { ++ v4i32 res = __lsx_vslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vslei_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslei_du(v2u64 _1, int var) { ++ v2i64 res = __lsx_vslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vslei_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsat_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lsx_vsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lsx_vsat_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsat_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vsat_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsat_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vsat_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsat_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vsat_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vsat_bu(v16u8 _1, int var) { ++ v16u8 res = __lsx_vsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lsx_vsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lsx_vsat_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vsat_hu(v8u16 _1, int var) { ++ v8u16 res = __lsx_vsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vsat_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vsat_wu(v4u32 _1, int var) { ++ v4u32 res = __lsx_vsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vsat_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vsat_du(v2u64 _1, int var) { ++ v2u64 res = __lsx_vsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vsat_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vreplvei_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vreplvei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vreplvei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vreplvei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vreplvei_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vreplvei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lsx_vreplvei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lsx_vreplvei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vreplvei_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vreplvei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __lsx_vreplvei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __lsx_vreplvei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vreplvei_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vreplvei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 1}} ++ res |= __lsx_vreplvei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range 0, 1}} ++ res |= __lsx_vreplvei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vandi_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lsx_vandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lsx_vandi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vandi_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vori_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lsx_vori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lsx_vori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vnori_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lsx_vnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lsx_vnori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vnori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vxori_b(v16u8 _1, int var) { ++ v16u8 res = __lsx_vxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lsx_vxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lsx_vxori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vxori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitseli_b(v16u8 _1, v16u8 _2, int var) { ++ v16u8 res = __lsx_vbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lsx_vbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lsx_vbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vbitseli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vshuf4i_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lsx_vshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lsx_vshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vshuf4i_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lsx_vshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lsx_vshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vshuf4i_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lsx_vshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lsx_vshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_w' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_b(v16i8 _1, int var) { ++ int res = __lsx_vpickve2gr_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vpickve2gr_b(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vpickve2gr_b(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_b' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_h(v8i16 _1, int var) { ++ int res = __lsx_vpickve2gr_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lsx_vpickve2gr_h(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lsx_vpickve2gr_h(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_h' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_w(v4i32 _1, int var) { ++ int res = __lsx_vpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __lsx_vpickve2gr_w(_1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __lsx_vpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_w' must be a constant integer}} ++ return res; ++} ++ ++long vpickve2gr_d(v2i64 _1, int var) { ++ long res = __lsx_vpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 1}} ++ res |= __lsx_vpickve2gr_d(_1, 2); // expected-error {{argument value 2 is outside the valid range 0, 1}} ++ res |= __lsx_vpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_d' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_bu(v16i8 _1, int var) { ++ unsigned int res = __lsx_vpickve2gr_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vpickve2gr_bu(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vpickve2gr_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_bu' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_hu(v8i16 _1, int var) { ++ unsigned int res = __lsx_vpickve2gr_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lsx_vpickve2gr_hu(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lsx_vpickve2gr_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_hu' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_wu(v4i32 _1, int var) { ++ unsigned int res = __lsx_vpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __lsx_vpickve2gr_wu(_1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __lsx_vpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_wu' must be a constant integer}} ++ return res; ++} ++ ++unsigned long int vpickve2gr_du(v2i64 _1, int var) { ++ unsigned long int res = __lsx_vpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 1}} ++ res |= __lsx_vpickve2gr_du(_1, 2); // expected-error {{argument value 2 is outside the valid range 0, 1}} ++ res |= __lsx_vpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vinsgr2vr_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vinsgr2vr_b(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vinsgr2vr_b(_1, 1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vinsgr2vr_b(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vinsgr2vr_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vinsgr2vr_h(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lsx_vinsgr2vr_h(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lsx_vinsgr2vr_h(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vinsgr2vr_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __lsx_vinsgr2vr_w(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __lsx_vinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vinsgr2vr_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 1}} ++ res |= __lsx_vinsgr2vr_d(_1, 1, 2); // expected-error {{argument value 2 is outside the valid range 0, 1}} ++ res |= __lsx_vinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_d' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsllwil_h_b(v16i8 _1, int var) { ++ v8i16 res = __lsx_vsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lsx_vsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lsx_vsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_h_b' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsllwil_w_h(v8i16 _1, int var) { ++ v4i32 res = __lsx_vsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_w_h' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsllwil_d_w(v4i32 _1, int var) { ++ v2i64 res = __lsx_vsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_d_w' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vsllwil_hu_bu(v16u8 _1, int var) { ++ v8u16 res = __lsx_vsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lsx_vsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lsx_vsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_hu_bu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vsllwil_wu_hu(v8u16 _1, int var) { ++ v4u32 res = __lsx_vsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_wu_hu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vsllwil_du_wu(v4u32 _1, int var) { ++ v2u64 res = __lsx_vsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_du_wu' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vfrstpi_b(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vfrstpi_h(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_h' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vshuf4i_d(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lsx_vshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lsx_vshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vbsrl_v(v16i8 _1, int var) { ++ v16i8 res = __lsx_vbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vbsrl_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsrl_v' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vbsll_v(v16i8 _1, int var) { ++ v16i8 res = __lsx_vbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vbsll_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsll_v' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vextrins_b(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lsx_vextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lsx_vextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vextrins_h(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lsx_vextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lsx_vextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vextrins_w(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lsx_vextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lsx_vextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vextrins_d(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lsx_vextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lsx_vextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_d' must be a constant integer}} ++ return res; ++} ++ ++void vstelm_b_idx(v16i8 _1, void *_2, int var) { ++ __lsx_vstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ __lsx_vstelm_b(_1, _2, 1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ __lsx_vstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} ++} ++ ++void vstelm_h_idx(v8i16 _1, void *_2, int var) { ++ __lsx_vstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ __lsx_vstelm_h(_1, _2, 2, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ __lsx_vstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} ++} ++ ++void vstelm_w_idx(v4i32 _1, void *_2, int var) { ++ __lsx_vstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ __lsx_vstelm_w(_1, _2, 4, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ __lsx_vstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} ++} ++ ++void vstelm_d_idx(v2i64 _1, void *_2, int var) { ++ __lsx_vstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 1}} ++ __lsx_vstelm_d(_1, _2, 8, 2); // expected-error {{argument value 2 is outside the valid range 0, 1}} ++ __lsx_vstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} ++} ++ ++void vstelm_b(v16i8 _1, void *_2, int var) { ++ __lsx_vstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range -128, 127}} ++ __lsx_vstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range -128, 127}} ++ __lsx_vstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} ++} ++ ++void vstelm_h(v8i16 _1, void *_2, int var) { ++ __lsx_vstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range -256, 254}} ++ __lsx_vstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range -256, 254}} ++ __lsx_vstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} ++} ++ ++void vstelm_w(v4i32 _1, void *_2, int var) { ++ __lsx_vstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range -512, 508}} ++ __lsx_vstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range -512, 508}} ++ __lsx_vstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} ++} ++ ++void vstelm_d(v2i64 _1, void *_2, int var) { ++ __lsx_vstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range -1024, 1016}} ++ __lsx_vstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range -1024, 1016}} ++ __lsx_vstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} ++} ++ ++v16i8 vldrepl_b(void *_1, int var) { ++ v16i8 res = __lsx_vldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range -2048, 2047}} ++ res |= __lsx_vldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2047}} ++ res |= __lsx_vldrepl_b(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vldrepl_h(void *_1, int var) { ++ v8i16 res = __lsx_vldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range -2048, 2046}} ++ res |= __lsx_vldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2046}} ++ res |= __lsx_vldrepl_h(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vldrepl_w(void *_1, int var) { ++ v4i32 res = __lsx_vldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range -2048, 2044}} ++ res |= __lsx_vldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2044}} ++ res |= __lsx_vldrepl_w(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vldrepl_d(void *_1, int var) { ++ v2i64 res = __lsx_vldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range -2048, 2040}} ++ res |= __lsx_vldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2040}} ++ res |= __lsx_vldrepl_d(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vrotri_b(v16i8 _1, int var) { ++ v16i8 res = __lsx_vrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lsx_vrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lsx_vrotri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vrotri_h(v8i16 _1, int var) { ++ v8i16 res = __lsx_vrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vrotri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vrotri_w(v4i32 _1, int var) { ++ v4i32 res = __lsx_vrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vrotri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vrotri_d(v2i64 _1, int var) { ++ v2i64 res = __lsx_vrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vrotri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lsx_vsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lsx_vsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lsx_vssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lsx_vssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __lsx_vssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __lsx_vssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __lsx_vssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __lsx_vssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lsx_vssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lsx_vssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lsx_vssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lsx_vssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __lsx_vssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __lsx_vssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __lsx_vssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __lsx_vssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lsx_vssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lsx_vssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrani_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrani_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrani_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrani_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lsx_vsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lsx_vsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lsx_vsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lsx_vsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrani_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrani_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrani_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrani_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lsx_vssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lsx_vssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __lsx_vssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __lsx_vssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __lsx_vssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrani_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __lsx_vssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lsx_vssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lsx_vssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __lsx_vssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __lsx_vssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __lsx_vssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lsx_vssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lsx_vssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __lsx_vssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lsx_vssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lsx_vssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __lsx_vssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lsx_vssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lsx_vssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __lsx_vssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lsx_vssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lsx_vssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __lsx_vssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lsx_vssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lsx_vssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vpermi_w(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __lsx_vpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lsx_vpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lsx_vpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vpermi_w' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vld(void *_1, int var) { ++ v16i8 res = __lsx_vld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range -2048, 2047}} ++ res |= __lsx_vld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2047}} ++ res |= __lsx_vld(_1, var); // expected-error {{argument to '__builtin_lsx_vld' must be a constant integer}} ++ return res; ++} ++ ++void vst(v16i8 _1, void *_2, int var) { ++ __lsx_vst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range -2048, 2047}} ++ __lsx_vst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2047}} ++ __lsx_vst(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vst' must be a constant integer}} ++} ++ ++v2i64 vldi(int var) { ++ v2i64 res = __lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range -4096, 4095}} ++ res |= __lsx_vldi(4096); // expected-error {{argument value 4096 is outside the valid range -4096, 4095}} ++ res |= __lsx_vldi(var); // expected-error {{argument to '__builtin_lsx_vldi' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vrepli_b(int var) { ++ v16i8 res = __lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range -512, 511}} ++ res |= __lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range -512, 511}} ++ res |= __lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vrepli_d(int var) { ++ v2i64 res = __lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range -512, 511}} ++ res |= __lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range -512, 511}} ++ res |= __lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vrepli_h(int var) { ++ v8i16 res = __lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range -512, 511}} ++ res |= __lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range -512, 511}} ++ res |= __lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vrepli_w(int var) { ++ v4i32 res = __lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range -512, 511}} ++ res |= __lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range -512, 511}} ++ res |= __lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c +new file mode 100644 +index 000000000000..331e29fb7d17 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c +@@ -0,0 +1,4451 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s ++ ++#include <lsxintrin.h> ++ ++// CHECK-LABEL: @vsll_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __lsx_vsll_b(_1, _2); } ++// CHECK-LABEL: @vsll_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __lsx_vsll_h(_1, _2); } ++// CHECK-LABEL: @vsll_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __lsx_vsll_w(_1, _2); } ++// CHECK-LABEL: @vsll_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __lsx_vsll_d(_1, _2); } ++// CHECK-LABEL: @vslli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vslli_b(v16i8 _1) { return __lsx_vslli_b(_1, 1); } ++// CHECK-LABEL: @vslli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vslli_h(v8i16 _1) { return __lsx_vslli_h(_1, 1); } ++// CHECK-LABEL: @vslli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vslli_w(v4i32 _1) { return __lsx_vslli_w(_1, 1); } ++// CHECK-LABEL: @vslli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vslli_d(v2i64 _1) { return __lsx_vslli_d(_1, 1); } ++// CHECK-LABEL: @vsra_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __lsx_vsra_b(_1, _2); } ++// CHECK-LABEL: @vsra_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __lsx_vsra_h(_1, _2); } ++// CHECK-LABEL: @vsra_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __lsx_vsra_w(_1, _2); } ++// CHECK-LABEL: @vsra_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __lsx_vsra_d(_1, _2); } ++// CHECK-LABEL: @vsrai_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrai_b(v16i8 _1) { return __lsx_vsrai_b(_1, 1); } ++// CHECK-LABEL: @vsrai_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrai_h(v8i16 _1) { return __lsx_vsrai_h(_1, 1); } ++// CHECK-LABEL: @vsrai_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrai_w(v4i32 _1) { return __lsx_vsrai_w(_1, 1); } ++// CHECK-LABEL: @vsrai_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrai_d(v2i64 _1) { return __lsx_vsrai_d(_1, 1); } ++// CHECK-LABEL: @vsrar_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrar_b(v16i8 _1, v16i8 _2) { return __lsx_vsrar_b(_1, _2); } ++// CHECK-LABEL: @vsrar_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrar_h(v8i16 _1, v8i16 _2) { return __lsx_vsrar_h(_1, _2); } ++// CHECK-LABEL: @vsrar_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrar_w(v4i32 _1, v4i32 _2) { return __lsx_vsrar_w(_1, _2); } ++// CHECK-LABEL: @vsrar_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrar_d(v2i64 _1, v2i64 _2) { return __lsx_vsrar_d(_1, _2); } ++// CHECK-LABEL: @vsrari_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrari_b(v16i8 _1) { return __lsx_vsrari_b(_1, 1); } ++// CHECK-LABEL: @vsrari_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrari_h(v8i16 _1) { return __lsx_vsrari_h(_1, 1); } ++// CHECK-LABEL: @vsrari_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrari_w(v4i32 _1) { return __lsx_vsrari_w(_1, 1); } ++// CHECK-LABEL: @vsrari_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrari_d(v2i64 _1) { return __lsx_vsrari_d(_1, 1); } ++// CHECK-LABEL: @vsrl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __lsx_vsrl_b(_1, _2); } ++// CHECK-LABEL: @vsrl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __lsx_vsrl_h(_1, _2); } ++// CHECK-LABEL: @vsrl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __lsx_vsrl_w(_1, _2); } ++// CHECK-LABEL: @vsrl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __lsx_vsrl_d(_1, _2); } ++// CHECK-LABEL: @vsrli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrli_b(v16i8 _1) { return __lsx_vsrli_b(_1, 1); } ++// CHECK-LABEL: @vsrli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrli_h(v8i16 _1) { return __lsx_vsrli_h(_1, 1); } ++// CHECK-LABEL: @vsrli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrli_w(v4i32 _1) { return __lsx_vsrli_w(_1, 1); } ++// CHECK-LABEL: @vsrli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrli_d(v2i64 _1) { return __lsx_vsrli_d(_1, 1); } ++// CHECK-LABEL: @vsrlr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { return __lsx_vsrlr_b(_1, _2); } ++// CHECK-LABEL: @vsrlr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlr_h(_1, _2); } ++// CHECK-LABEL: @vsrlr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlr_w(_1, _2); } ++// CHECK-LABEL: @vsrlr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlr_d(_1, _2); } ++// CHECK-LABEL: @vsrlri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrlri_b(v16i8 _1) { return __lsx_vsrlri_b(_1, 1); } ++// CHECK-LABEL: @vsrlri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrlri_h(v8i16 _1) { return __lsx_vsrlri_h(_1, 1); } ++// CHECK-LABEL: @vsrlri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrlri_w(v4i32 _1) { return __lsx_vsrlri_w(_1, 1); } ++// CHECK-LABEL: @vsrlri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrlri_d(v2i64 _1) { return __lsx_vsrlri_d(_1, 1); } ++// CHECK-LABEL: @vbitclr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { return __lsx_vbitclr_b(_1, _2); } ++// CHECK-LABEL: @vbitclr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { return __lsx_vbitclr_h(_1, _2); } ++// CHECK-LABEL: @vbitclr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { return __lsx_vbitclr_w(_1, _2); } ++// CHECK-LABEL: @vbitclr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { return __lsx_vbitclr_d(_1, _2); } ++// CHECK-LABEL: @vbitclri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vbitclri_b(v16u8 _1) { return __lsx_vbitclri_b(_1, 1); } ++// CHECK-LABEL: @vbitclri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vbitclri_h(v8u16 _1) { return __lsx_vbitclri_h(_1, 1); } ++// CHECK-LABEL: @vbitclri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vbitclri_w(v4u32 _1) { return __lsx_vbitclri_w(_1, 1); } ++// CHECK-LABEL: @vbitclri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vbitclri_d(v2u64 _1) { return __lsx_vbitclri_d(_1, 1); } ++// CHECK-LABEL: @vbitset_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vbitset_b(v16u8 _1, v16u8 _2) { return __lsx_vbitset_b(_1, _2); } ++// CHECK-LABEL: @vbitset_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vbitset_h(v8u16 _1, v8u16 _2) { return __lsx_vbitset_h(_1, _2); } ++// CHECK-LABEL: @vbitset_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vbitset_w(v4u32 _1, v4u32 _2) { return __lsx_vbitset_w(_1, _2); } ++// CHECK-LABEL: @vbitset_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vbitset_d(v2u64 _1, v2u64 _2) { return __lsx_vbitset_d(_1, _2); } ++// CHECK-LABEL: @vbitseti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vbitseti_b(v16u8 _1) { return __lsx_vbitseti_b(_1, 1); } ++// CHECK-LABEL: @vbitseti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vbitseti_h(v8u16 _1) { return __lsx_vbitseti_h(_1, 1); } ++// CHECK-LABEL: @vbitseti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vbitseti_w(v4u32 _1) { return __lsx_vbitseti_w(_1, 1); } ++// CHECK-LABEL: @vbitseti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vbitseti_d(v2u64 _1) { return __lsx_vbitseti_d(_1, 1); } ++// CHECK-LABEL: @vbitrev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { return __lsx_vbitrev_b(_1, _2); } ++// CHECK-LABEL: @vbitrev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { return __lsx_vbitrev_h(_1, _2); } ++// CHECK-LABEL: @vbitrev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { return __lsx_vbitrev_w(_1, _2); } ++// CHECK-LABEL: @vbitrev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { return __lsx_vbitrev_d(_1, _2); } ++// CHECK-LABEL: @vbitrevi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vbitrevi_b(v16u8 _1) { return __lsx_vbitrevi_b(_1, 1); } ++// CHECK-LABEL: @vbitrevi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vbitrevi_h(v8u16 _1) { return __lsx_vbitrevi_h(_1, 1); } ++// CHECK-LABEL: @vbitrevi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vbitrevi_w(v4u32 _1) { return __lsx_vbitrevi_w(_1, 1); } ++// CHECK-LABEL: @vbitrevi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vbitrevi_d(v2u64 _1) { return __lsx_vbitrevi_d(_1, 1); } ++// CHECK-LABEL: @vadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __lsx_vadd_b(_1, _2); } ++// CHECK-LABEL: @vadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __lsx_vadd_h(_1, _2); } ++// CHECK-LABEL: @vadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __lsx_vadd_w(_1, _2); } ++// CHECK-LABEL: @vadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __lsx_vadd_d(_1, _2); } ++// CHECK-LABEL: @vaddi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vaddi_bu(v16i8 _1) { return __lsx_vaddi_bu(_1, 1); } ++// CHECK-LABEL: @vaddi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vaddi_hu(v8i16 _1) { return __lsx_vaddi_hu(_1, 1); } ++// CHECK-LABEL: @vaddi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vaddi_wu(v4i32 _1) { return __lsx_vaddi_wu(_1, 1); } ++// CHECK-LABEL: @vaddi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddi_du(v2i64 _1) { return __lsx_vaddi_du(_1, 1); } ++// CHECK-LABEL: @vsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __lsx_vsub_b(_1, _2); } ++// CHECK-LABEL: @vsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __lsx_vsub_h(_1, _2); } ++// CHECK-LABEL: @vsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __lsx_vsub_w(_1, _2); } ++// CHECK-LABEL: @vsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __lsx_vsub_d(_1, _2); } ++// CHECK-LABEL: @vsubi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsubi_bu(v16i8 _1) { return __lsx_vsubi_bu(_1, 1); } ++// CHECK-LABEL: @vsubi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsubi_hu(v8i16 _1) { return __lsx_vsubi_hu(_1, 1); } ++// CHECK-LABEL: @vsubi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsubi_wu(v4i32 _1) { return __lsx_vsubi_wu(_1, 1); } ++// CHECK-LABEL: @vsubi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubi_du(v2i64 _1) { return __lsx_vsubi_du(_1, 1); } ++// CHECK-LABEL: @vmax_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __lsx_vmax_b(_1, _2); } ++// CHECK-LABEL: @vmax_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __lsx_vmax_h(_1, _2); } ++// CHECK-LABEL: @vmax_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __lsx_vmax_w(_1, _2); } ++// CHECK-LABEL: @vmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __lsx_vmax_d(_1, _2); } ++// CHECK-LABEL: @vmaxi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmaxi_b(v16i8 _1) { return __lsx_vmaxi_b(_1, 1); } ++// CHECK-LABEL: @vmaxi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmaxi_h(v8i16 _1) { return __lsx_vmaxi_h(_1, 1); } ++// CHECK-LABEL: @vmaxi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmaxi_w(v4i32 _1) { return __lsx_vmaxi_w(_1, 1); } ++// CHECK-LABEL: @vmaxi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaxi_d(v2i64 _1) { return __lsx_vmaxi_d(_1, 1); } ++// CHECK-LABEL: @vmax_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vmax_bu(v16u8 _1, v16u8 _2) { return __lsx_vmax_bu(_1, _2); } ++// CHECK-LABEL: @vmax_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vmax_hu(v8u16 _1, v8u16 _2) { return __lsx_vmax_hu(_1, _2); } ++// CHECK-LABEL: @vmax_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vmax_wu(v4u32 _1, v4u32 _2) { return __lsx_vmax_wu(_1, _2); } ++// CHECK-LABEL: @vmax_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmax_du(v2u64 _1, v2u64 _2) { return __lsx_vmax_du(_1, _2); } ++// CHECK-LABEL: @vmaxi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vmaxi_bu(v16u8 _1) { return __lsx_vmaxi_bu(_1, 1); } ++// CHECK-LABEL: @vmaxi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vmaxi_hu(v8u16 _1) { return __lsx_vmaxi_hu(_1, 1); } ++// CHECK-LABEL: @vmaxi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vmaxi_wu(v4u32 _1) { return __lsx_vmaxi_wu(_1, 1); } ++// CHECK-LABEL: @vmaxi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmaxi_du(v2u64 _1) { return __lsx_vmaxi_du(_1, 1); } ++// CHECK-LABEL: @vmin_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __lsx_vmin_b(_1, _2); } ++// CHECK-LABEL: @vmin_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __lsx_vmin_h(_1, _2); } ++// CHECK-LABEL: @vmin_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __lsx_vmin_w(_1, _2); } ++// CHECK-LABEL: @vmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __lsx_vmin_d(_1, _2); } ++// CHECK-LABEL: @vmini_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmini_b(v16i8 _1) { return __lsx_vmini_b(_1, 1); } ++// CHECK-LABEL: @vmini_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmini_h(v8i16 _1) { return __lsx_vmini_h(_1, 1); } ++// CHECK-LABEL: @vmini_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmini_w(v4i32 _1) { return __lsx_vmini_w(_1, 1); } ++// CHECK-LABEL: @vmini_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmini_d(v2i64 _1) { return __lsx_vmini_d(_1, 1); } ++// CHECK-LABEL: @vmin_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vmin_bu(v16u8 _1, v16u8 _2) { return __lsx_vmin_bu(_1, _2); } ++// CHECK-LABEL: @vmin_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vmin_hu(v8u16 _1, v8u16 _2) { return __lsx_vmin_hu(_1, _2); } ++// CHECK-LABEL: @vmin_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vmin_wu(v4u32 _1, v4u32 _2) { return __lsx_vmin_wu(_1, _2); } ++// CHECK-LABEL: @vmin_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmin_du(v2u64 _1, v2u64 _2) { return __lsx_vmin_du(_1, _2); } ++// CHECK-LABEL: @vmini_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vmini_bu(v16u8 _1) { return __lsx_vmini_bu(_1, 1); } ++// CHECK-LABEL: @vmini_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vmini_hu(v8u16 _1) { return __lsx_vmini_hu(_1, 1); } ++// CHECK-LABEL: @vmini_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vmini_wu(v4u32 _1) { return __lsx_vmini_wu(_1, 1); } ++// CHECK-LABEL: @vmini_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmini_du(v2u64 _1) { return __lsx_vmini_du(_1, 1); } ++// CHECK-LABEL: @vseq_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __lsx_vseq_b(_1, _2); } ++// CHECK-LABEL: @vseq_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __lsx_vseq_h(_1, _2); } ++// CHECK-LABEL: @vseq_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __lsx_vseq_w(_1, _2); } ++// CHECK-LABEL: @vseq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __lsx_vseq_d(_1, _2); } ++// CHECK-LABEL: @vseqi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vseqi_b(v16i8 _1) { return __lsx_vseqi_b(_1, 1); } ++// CHECK-LABEL: @vseqi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vseqi_h(v8i16 _1) { return __lsx_vseqi_h(_1, 1); } ++// CHECK-LABEL: @vseqi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vseqi_w(v4i32 _1) { return __lsx_vseqi_w(_1, 1); } ++// CHECK-LABEL: @vseqi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vseqi_d(v2i64 _1) { return __lsx_vseqi_d(_1, 1); } ++// CHECK-LABEL: @vslti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vslti_b(v16i8 _1) { return __lsx_vslti_b(_1, 1); } ++// CHECK-LABEL: @vslt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __lsx_vslt_b(_1, _2); } ++// CHECK-LABEL: @vslt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __lsx_vslt_h(_1, _2); } ++// CHECK-LABEL: @vslt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __lsx_vslt_w(_1, _2); } ++// CHECK-LABEL: @vslt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __lsx_vslt_d(_1, _2); } ++// CHECK-LABEL: @vslti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vslti_h(v8i16 _1) { return __lsx_vslti_h(_1, 1); } ++// CHECK-LABEL: @vslti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vslti_w(v4i32 _1) { return __lsx_vslti_w(_1, 1); } ++// CHECK-LABEL: @vslti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vslti_d(v2i64 _1) { return __lsx_vslti_d(_1, 1); } ++// CHECK-LABEL: @vslt_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vslt_bu(v16u8 _1, v16u8 _2) { return __lsx_vslt_bu(_1, _2); } ++// CHECK-LABEL: @vslt_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vslt_hu(v8u16 _1, v8u16 _2) { return __lsx_vslt_hu(_1, _2); } ++// CHECK-LABEL: @vslt_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vslt_wu(v4u32 _1, v4u32 _2) { return __lsx_vslt_wu(_1, _2); } ++// CHECK-LABEL: @vslt_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vslt_du(v2u64 _1, v2u64 _2) { return __lsx_vslt_du(_1, _2); } ++// CHECK-LABEL: @vslti_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vslti_bu(v16u8 _1) { return __lsx_vslti_bu(_1, 1); } ++// CHECK-LABEL: @vslti_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vslti_hu(v8u16 _1) { return __lsx_vslti_hu(_1, 1); } ++// CHECK-LABEL: @vslti_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vslti_wu(v4u32 _1) { return __lsx_vslti_wu(_1, 1); } ++// CHECK-LABEL: @vslti_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vslti_du(v2u64 _1) { return __lsx_vslti_du(_1, 1); } ++// CHECK-LABEL: @vsle_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __lsx_vsle_b(_1, _2); } ++// CHECK-LABEL: @vsle_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __lsx_vsle_h(_1, _2); } ++// CHECK-LABEL: @vsle_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __lsx_vsle_w(_1, _2); } ++// CHECK-LABEL: @vsle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __lsx_vsle_d(_1, _2); } ++// CHECK-LABEL: @vslei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vslei_b(v16i8 _1) { return __lsx_vslei_b(_1, 1); } ++// CHECK-LABEL: @vslei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vslei_h(v8i16 _1) { return __lsx_vslei_h(_1, 1); } ++// CHECK-LABEL: @vslei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vslei_w(v4i32 _1) { return __lsx_vslei_w(_1, 1); } ++// CHECK-LABEL: @vslei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vslei_d(v2i64 _1) { return __lsx_vslei_d(_1, 1); } ++// CHECK-LABEL: @vsle_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsle_bu(v16u8 _1, v16u8 _2) { return __lsx_vsle_bu(_1, _2); } ++// CHECK-LABEL: @vsle_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsle_hu(v8u16 _1, v8u16 _2) { return __lsx_vsle_hu(_1, _2); } ++// CHECK-LABEL: @vsle_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsle_wu(v4u32 _1, v4u32 _2) { return __lsx_vsle_wu(_1, _2); } ++// CHECK-LABEL: @vsle_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsle_du(v2u64 _1, v2u64 _2) { return __lsx_vsle_du(_1, _2); } ++// CHECK-LABEL: @vslei_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vslei_bu(v16u8 _1) { return __lsx_vslei_bu(_1, 1); } ++// CHECK-LABEL: @vslei_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vslei_hu(v8u16 _1) { return __lsx_vslei_hu(_1, 1); } ++// CHECK-LABEL: @vslei_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vslei_wu(v4u32 _1) { return __lsx_vslei_wu(_1, 1); } ++// CHECK-LABEL: @vslei_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vslei_du(v2u64 _1) { return __lsx_vslei_du(_1, 1); } ++// CHECK-LABEL: @vsat_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsat_b(v16i8 _1) { return __lsx_vsat_b(_1, 1); } ++// CHECK-LABEL: @vsat_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsat_h(v8i16 _1) { return __lsx_vsat_h(_1, 1); } ++// CHECK-LABEL: @vsat_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsat_w(v4i32 _1) { return __lsx_vsat_w(_1, 1); } ++// CHECK-LABEL: @vsat_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsat_d(v2i64 _1) { return __lsx_vsat_d(_1, 1); } ++// CHECK-LABEL: @vsat_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vsat_bu(v16u8 _1) { return __lsx_vsat_bu(_1, 1); } ++// CHECK-LABEL: @vsat_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vsat_hu(v8u16 _1) { return __lsx_vsat_hu(_1, 1); } ++// CHECK-LABEL: @vsat_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vsat_wu(v4u32 _1) { return __lsx_vsat_wu(_1, 1); } ++// CHECK-LABEL: @vsat_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vsat_du(v2u64 _1) { return __lsx_vsat_du(_1, 1); } ++// CHECK-LABEL: @vadda_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vadda_b(v16i8 _1, v16i8 _2) { return __lsx_vadda_b(_1, _2); } ++// CHECK-LABEL: @vadda_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vadda_h(v8i16 _1, v8i16 _2) { return __lsx_vadda_h(_1, _2); } ++// CHECK-LABEL: @vadda_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vadda_w(v4i32 _1, v4i32 _2) { return __lsx_vadda_w(_1, _2); } ++// CHECK-LABEL: @vadda_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vadda_d(v2i64 _1, v2i64 _2) { return __lsx_vadda_d(_1, _2); } ++// CHECK-LABEL: @vsadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsadd_b(v16i8 _1, v16i8 _2) { return __lsx_vsadd_b(_1, _2); } ++// CHECK-LABEL: @vsadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsadd_h(v8i16 _1, v8i16 _2) { return __lsx_vsadd_h(_1, _2); } ++// CHECK-LABEL: @vsadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsadd_w(v4i32 _1, v4i32 _2) { return __lsx_vsadd_w(_1, _2); } ++// CHECK-LABEL: @vsadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsadd_d(v2i64 _1, v2i64 _2) { return __lsx_vsadd_d(_1, _2); } ++// CHECK-LABEL: @vsadd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { return __lsx_vsadd_bu(_1, _2); } ++// CHECK-LABEL: @vsadd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { return __lsx_vsadd_hu(_1, _2); } ++// CHECK-LABEL: @vsadd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { return __lsx_vsadd_wu(_1, _2); } ++// CHECK-LABEL: @vsadd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vsadd_du(v2u64 _1, v2u64 _2) { return __lsx_vsadd_du(_1, _2); } ++// CHECK-LABEL: @vavg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __lsx_vavg_b(_1, _2); } ++// CHECK-LABEL: @vavg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __lsx_vavg_h(_1, _2); } ++// CHECK-LABEL: @vavg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __lsx_vavg_w(_1, _2); } ++// CHECK-LABEL: @vavg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __lsx_vavg_d(_1, _2); } ++// CHECK-LABEL: @vavg_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vavg_bu(v16u8 _1, v16u8 _2) { return __lsx_vavg_bu(_1, _2); } ++// CHECK-LABEL: @vavg_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vavg_hu(v8u16 _1, v8u16 _2) { return __lsx_vavg_hu(_1, _2); } ++// CHECK-LABEL: @vavg_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vavg_wu(v4u32 _1, v4u32 _2) { return __lsx_vavg_wu(_1, _2); } ++// CHECK-LABEL: @vavg_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vavg_du(v2u64 _1, v2u64 _2) { return __lsx_vavg_du(_1, _2); } ++// CHECK-LABEL: @vavgr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vavgr_b(v16i8 _1, v16i8 _2) { return __lsx_vavgr_b(_1, _2); } ++// CHECK-LABEL: @vavgr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vavgr_h(v8i16 _1, v8i16 _2) { return __lsx_vavgr_h(_1, _2); } ++// CHECK-LABEL: @vavgr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vavgr_w(v4i32 _1, v4i32 _2) { return __lsx_vavgr_w(_1, _2); } ++// CHECK-LABEL: @vavgr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vavgr_d(v2i64 _1, v2i64 _2) { return __lsx_vavgr_d(_1, _2); } ++// CHECK-LABEL: @vavgr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { return __lsx_vavgr_bu(_1, _2); } ++// CHECK-LABEL: @vavgr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { return __lsx_vavgr_hu(_1, _2); } ++// CHECK-LABEL: @vavgr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { return __lsx_vavgr_wu(_1, _2); } ++// CHECK-LABEL: @vavgr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vavgr_du(v2u64 _1, v2u64 _2) { return __lsx_vavgr_du(_1, _2); } ++// CHECK-LABEL: @vssub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssub_b(v16i8 _1, v16i8 _2) { return __lsx_vssub_b(_1, _2); } ++// CHECK-LABEL: @vssub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssub_h(v8i16 _1, v8i16 _2) { return __lsx_vssub_h(_1, _2); } ++// CHECK-LABEL: @vssub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssub_w(v4i32 _1, v4i32 _2) { return __lsx_vssub_w(_1, _2); } ++// CHECK-LABEL: @vssub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vssub_d(v2i64 _1, v2i64 _2) { return __lsx_vssub_d(_1, _2); } ++// CHECK-LABEL: @vssub_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssub_bu(v16u8 _1, v16u8 _2) { return __lsx_vssub_bu(_1, _2); } ++// CHECK-LABEL: @vssub_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssub_hu(v8u16 _1, v8u16 _2) { return __lsx_vssub_hu(_1, _2); } ++// CHECK-LABEL: @vssub_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssub_wu(v4u32 _1, v4u32 _2) { return __lsx_vssub_wu(_1, _2); } ++// CHECK-LABEL: @vssub_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vssub_du(v2u64 _1, v2u64 _2) { return __lsx_vssub_du(_1, _2); } ++// CHECK-LABEL: @vabsd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vabsd_b(v16i8 _1, v16i8 _2) { return __lsx_vabsd_b(_1, _2); } ++// CHECK-LABEL: @vabsd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vabsd_h(v8i16 _1, v8i16 _2) { return __lsx_vabsd_h(_1, _2); } ++// CHECK-LABEL: @vabsd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vabsd_w(v4i32 _1, v4i32 _2) { return __lsx_vabsd_w(_1, _2); } ++// CHECK-LABEL: @vabsd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vabsd_d(v2i64 _1, v2i64 _2) { return __lsx_vabsd_d(_1, _2); } ++// CHECK-LABEL: @vabsd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { return __lsx_vabsd_bu(_1, _2); } ++// CHECK-LABEL: @vabsd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { return __lsx_vabsd_hu(_1, _2); } ++// CHECK-LABEL: @vabsd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { return __lsx_vabsd_wu(_1, _2); } ++// CHECK-LABEL: @vabsd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vabsd_du(v2u64 _1, v2u64 _2) { return __lsx_vabsd_du(_1, _2); } ++// CHECK-LABEL: @vmul_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __lsx_vmul_b(_1, _2); } ++// CHECK-LABEL: @vmul_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __lsx_vmul_h(_1, _2); } ++// CHECK-LABEL: @vmul_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __lsx_vmul_w(_1, _2); } ++// CHECK-LABEL: @vmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __lsx_vmul_d(_1, _2); } ++// CHECK-LABEL: @vmadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vmadd_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vmadd_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __lsx_vmadd_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __lsx_vmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vmsub_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vmsub_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __lsx_vmsub_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __lsx_vmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vdiv_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __lsx_vdiv_b(_1, _2); } ++// CHECK-LABEL: @vdiv_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __lsx_vdiv_h(_1, _2); } ++// CHECK-LABEL: @vdiv_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __lsx_vdiv_w(_1, _2); } ++// CHECK-LABEL: @vdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __lsx_vdiv_d(_1, _2); } ++// CHECK-LABEL: @vdiv_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { return __lsx_vdiv_bu(_1, _2); } ++// CHECK-LABEL: @vdiv_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { return __lsx_vdiv_hu(_1, _2); } ++// CHECK-LABEL: @vdiv_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { return __lsx_vdiv_wu(_1, _2); } ++// CHECK-LABEL: @vdiv_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vdiv_du(v2u64 _1, v2u64 _2) { return __lsx_vdiv_du(_1, _2); } ++// CHECK-LABEL: @vhaddw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhaddw_h_b(_1, _2); } ++// CHECK-LABEL: @vhaddw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhaddw_w_h(_1, _2); } ++// CHECK-LABEL: @vhaddw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhaddw_d_w(_1, _2); } ++// CHECK-LABEL: @vhaddw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhaddw_hu_bu(_1, _2); } ++// CHECK-LABEL: @vhaddw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhaddw_wu_hu(_1, _2); } ++// CHECK-LABEL: @vhaddw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhaddw_du_wu(_1, _2); } ++// CHECK-LABEL: @vhsubw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhsubw_h_b(_1, _2); } ++// CHECK-LABEL: @vhsubw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhsubw_w_h(_1, _2); } ++// CHECK-LABEL: @vhsubw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhsubw_d_w(_1, _2); } ++// CHECK-LABEL: @vhsubw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhsubw_hu_bu(_1, _2); } ++// CHECK-LABEL: @vhsubw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhsubw_wu_hu(_1, _2); } ++// CHECK-LABEL: @vhsubw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhsubw_du_wu(_1, _2); } ++// CHECK-LABEL: @vmod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __lsx_vmod_b(_1, _2); } ++// CHECK-LABEL: @vmod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __lsx_vmod_h(_1, _2); } ++// CHECK-LABEL: @vmod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __lsx_vmod_w(_1, _2); } ++// CHECK-LABEL: @vmod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __lsx_vmod_d(_1, _2); } ++// CHECK-LABEL: @vmod_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vmod_bu(v16u8 _1, v16u8 _2) { return __lsx_vmod_bu(_1, _2); } ++// CHECK-LABEL: @vmod_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vmod_hu(v8u16 _1, v8u16 _2) { return __lsx_vmod_hu(_1, _2); } ++// CHECK-LABEL: @vmod_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vmod_wu(v4u32 _1, v4u32 _2) { return __lsx_vmod_wu(_1, _2); } ++// CHECK-LABEL: @vmod_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmod_du(v2u64 _1, v2u64 _2) { return __lsx_vmod_du(_1, _2); } ++// CHECK-LABEL: @vreplve_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> _1:%.*, i32 _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vreplve_b(v16i8 _1, int _2) { return __lsx_vreplve_b(_1, _2); } ++// CHECK-LABEL: @vreplve_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> _1:%.*, i32 _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vreplve_h(v8i16 _1, int _2) { return __lsx_vreplve_h(_1, _2); } ++// CHECK-LABEL: @vreplve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> _1:%.*, i32 _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vreplve_w(v4i32 _1, int _2) { return __lsx_vreplve_w(_1, _2); } ++// CHECK-LABEL: @vreplve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> _1:%.*, i32 _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vreplve_d(v2i64 _1, int _2) { return __lsx_vreplve_d(_1, _2); } ++// CHECK-LABEL: @vreplvei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vreplvei_b(v16i8 _1) { return __lsx_vreplvei_b(_1, 1); } ++// CHECK-LABEL: @vreplvei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vreplvei_h(v8i16 _1) { return __lsx_vreplvei_h(_1, 1); } ++// CHECK-LABEL: @vreplvei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vreplvei_w(v4i32 _1) { return __lsx_vreplvei_w(_1, 1); } ++// CHECK-LABEL: @vreplvei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vreplvei_d(v2i64 _1) { return __lsx_vreplvei_d(_1, 1); } ++// CHECK-LABEL: @vpickev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vpickev_b(v16i8 _1, v16i8 _2) { return __lsx_vpickev_b(_1, _2); } ++// CHECK-LABEL: @vpickev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vpickev_h(v8i16 _1, v8i16 _2) { return __lsx_vpickev_h(_1, _2); } ++// CHECK-LABEL: @vpickev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vpickev_w(v4i32 _1, v4i32 _2) { return __lsx_vpickev_w(_1, _2); } ++// CHECK-LABEL: @vpickev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vpickev_d(v2i64 _1, v2i64 _2) { return __lsx_vpickev_d(_1, _2); } ++// CHECK-LABEL: @vpickod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vpickod_b(v16i8 _1, v16i8 _2) { return __lsx_vpickod_b(_1, _2); } ++// CHECK-LABEL: @vpickod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vpickod_h(v8i16 _1, v8i16 _2) { return __lsx_vpickod_h(_1, _2); } ++// CHECK-LABEL: @vpickod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vpickod_w(v4i32 _1, v4i32 _2) { return __lsx_vpickod_w(_1, _2); } ++// CHECK-LABEL: @vpickod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vpickod_d(v2i64 _1, v2i64 _2) { return __lsx_vpickod_d(_1, _2); } ++// CHECK-LABEL: @vilvh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vilvh_b(v16i8 _1, v16i8 _2) { return __lsx_vilvh_b(_1, _2); } ++// CHECK-LABEL: @vilvh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vilvh_h(v8i16 _1, v8i16 _2) { return __lsx_vilvh_h(_1, _2); } ++// CHECK-LABEL: @vilvh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vilvh_w(v4i32 _1, v4i32 _2) { return __lsx_vilvh_w(_1, _2); } ++// CHECK-LABEL: @vilvh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vilvh_d(v2i64 _1, v2i64 _2) { return __lsx_vilvh_d(_1, _2); } ++// CHECK-LABEL: @vilvl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vilvl_b(v16i8 _1, v16i8 _2) { return __lsx_vilvl_b(_1, _2); } ++// CHECK-LABEL: @vilvl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vilvl_h(v8i16 _1, v8i16 _2) { return __lsx_vilvl_h(_1, _2); } ++// CHECK-LABEL: @vilvl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vilvl_w(v4i32 _1, v4i32 _2) { return __lsx_vilvl_w(_1, _2); } ++// CHECK-LABEL: @vilvl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vilvl_d(v2i64 _1, v2i64 _2) { return __lsx_vilvl_d(_1, _2); } ++// CHECK-LABEL: @vpackev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vpackev_b(v16i8 _1, v16i8 _2) { return __lsx_vpackev_b(_1, _2); } ++// CHECK-LABEL: @vpackev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vpackev_h(v8i16 _1, v8i16 _2) { return __lsx_vpackev_h(_1, _2); } ++// CHECK-LABEL: @vpackev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vpackev_w(v4i32 _1, v4i32 _2) { return __lsx_vpackev_w(_1, _2); } ++// CHECK-LABEL: @vpackev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vpackev_d(v2i64 _1, v2i64 _2) { return __lsx_vpackev_d(_1, _2); } ++// CHECK-LABEL: @vpackod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vpackod_b(v16i8 _1, v16i8 _2) { return __lsx_vpackod_b(_1, _2); } ++// CHECK-LABEL: @vpackod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vpackod_h(v8i16 _1, v8i16 _2) { return __lsx_vpackod_h(_1, _2); } ++// CHECK-LABEL: @vpackod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vpackod_w(v4i32 _1, v4i32 _2) { return __lsx_vpackod_w(_1, _2); } ++// CHECK-LABEL: @vpackod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vpackod_d(v2i64 _1, v2i64 _2) { return __lsx_vpackod_d(_1, _2); } ++// CHECK-LABEL: @vshuf_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vshuf_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __lsx_vshuf_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __lsx_vshuf_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vand_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vand_v(v16u8 _1, v16u8 _2) { return __lsx_vand_v(_1, _2); } ++// CHECK-LABEL: @vandi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vandi_b(v16u8 _1) { return __lsx_vandi_b(_1, 1); } ++// CHECK-LABEL: @vor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vor_v(v16u8 _1, v16u8 _2) { return __lsx_vor_v(_1, _2); } ++// CHECK-LABEL: @vori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vori_b(v16u8 _1) { return __lsx_vori_b(_1, 1); } ++// CHECK-LABEL: @vnor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __lsx_vnor_v(_1, _2); } ++// CHECK-LABEL: @vnori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vnori_b(v16u8 _1) { return __lsx_vnori_b(_1, 1); } ++// CHECK-LABEL: @vxor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __lsx_vxor_v(_1, _2); } ++// CHECK-LABEL: @vxori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vxori_b(v16u8 _1) { return __lsx_vxori_b(_1, 1); } ++// CHECK-LABEL: @vbitsel_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { ++ return __lsx_vbitsel_v(_1, _2, _3); ++} ++// CHECK-LABEL: @vbitseli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { return __lsx_vbitseli_b(_1, _2, 1); } ++// CHECK-LABEL: @vshuf4i_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vshuf4i_b(v16i8 _1) { return __lsx_vshuf4i_b(_1, 1); } ++// CHECK-LABEL: @vshuf4i_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vshuf4i_h(v8i16 _1) { return __lsx_vshuf4i_h(_1, 1); } ++// CHECK-LABEL: @vshuf4i_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vshuf4i_w(v4i32 _1) { return __lsx_vshuf4i_w(_1, 1); } ++// CHECK-LABEL: @vreplgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 _1:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vreplgr2vr_b(int _1) { return __lsx_vreplgr2vr_b(_1); } ++// CHECK-LABEL: @vreplgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 _1:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vreplgr2vr_h(int _1) { return __lsx_vreplgr2vr_h(_1); } ++// CHECK-LABEL: @vreplgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vreplgr2vr_w(int _1) { return __lsx_vreplgr2vr_w(_1); } ++// CHECK-LABEL: @vreplgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vreplgr2vr_d(long _1) { return __lsx_vreplgr2vr_d(_1); } ++// CHECK-LABEL: @vpcnt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vpcnt_b(v16i8 _1) { return __lsx_vpcnt_b(_1); } ++// CHECK-LABEL: @vpcnt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vpcnt_h(v8i16 _1) { return __lsx_vpcnt_h(_1); } ++// CHECK-LABEL: @vpcnt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vpcnt_w(v4i32 _1) { return __lsx_vpcnt_w(_1); } ++// CHECK-LABEL: @vpcnt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vpcnt_d(v2i64 _1) { return __lsx_vpcnt_d(_1); } ++// CHECK-LABEL: @vclo_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vclo_b(v16i8 _1) { return __lsx_vclo_b(_1); } ++// CHECK-LABEL: @vclo_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vclo_h(v8i16 _1) { return __lsx_vclo_h(_1); } ++// CHECK-LABEL: @vclo_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vclo_w(v4i32 _1) { return __lsx_vclo_w(_1); } ++// CHECK-LABEL: @vclo_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vclo_d(v2i64 _1) { return __lsx_vclo_d(_1); } ++// CHECK-LABEL: @vclz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vclz_b(v16i8 _1) { return __lsx_vclz_b(_1); } ++// CHECK-LABEL: @vclz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vclz_h(v8i16 _1) { return __lsx_vclz_h(_1); } ++// CHECK-LABEL: @vclz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vclz_w(v4i32 _1) { return __lsx_vclz_w(_1); } ++// CHECK-LABEL: @vclz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vclz_d(v2i64 _1) { return __lsx_vclz_d(_1); } ++// CHECK-LABEL: @vpickve2gr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int vpickve2gr_b(v16i8 _1) { return __lsx_vpickve2gr_b(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int vpickve2gr_h(v8i16 _1) { return __lsx_vpickve2gr_h(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int vpickve2gr_w(v4i32 _1) { return __lsx_vpickve2gr_w(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret i64 TMP0 ++// ++long vpickve2gr_d(v2i64 _1) { return __lsx_vpickve2gr_d(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret i32 TMP0 ++// ++unsigned int vpickve2gr_bu(v16i8 _1) { return __lsx_vpickve2gr_bu(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret i32 TMP0 ++// ++unsigned int vpickve2gr_hu(v8i16 _1) { return __lsx_vpickve2gr_hu(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret i32 TMP0 ++// ++unsigned int vpickve2gr_wu(v4i32 _1) { return __lsx_vpickve2gr_wu(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret i64 TMP0 ++// ++unsigned long int vpickve2gr_du(v2i64 _1) { return __lsx_vpickve2gr_du(_1, 1); } ++// CHECK-LABEL: @vinsgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> _1:%.*, i32 1, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vinsgr2vr_b(v16i8 _1) { return __lsx_vinsgr2vr_b(_1, 1, 1); } ++// CHECK-LABEL: @vinsgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> _1:%.*, i32 1, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vinsgr2vr_h(v8i16 _1) { return __lsx_vinsgr2vr_h(_1, 1, 1); } ++// CHECK-LABEL: @vinsgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> _1:%.*, i32 1, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vinsgr2vr_w(v4i32 _1) { return __lsx_vinsgr2vr_w(_1, 1, 1); } ++// CHECK-LABEL: @vinsgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> _1:%.*, i64 1, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vinsgr2vr_d(v2i64 _1) { return __lsx_vinsgr2vr_d(_1, 1, 1); } ++// CHECK-LABEL: @vfadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfadd_s(v4f32 _1, v4f32 _2) { return __lsx_vfadd_s(_1, _2); } ++// CHECK-LABEL: @vfadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfadd_d(v2f64 _1, v2f64 _2) { return __lsx_vfadd_d(_1, _2); } ++// CHECK-LABEL: @vfsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfsub_s(v4f32 _1, v4f32 _2) { return __lsx_vfsub_s(_1, _2); } ++// CHECK-LABEL: @vfsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfsub_d(v2f64 _1, v2f64 _2) { return __lsx_vfsub_d(_1, _2); } ++// CHECK-LABEL: @vfmul_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfmul_s(v4f32 _1, v4f32 _2) { return __lsx_vfmul_s(_1, _2); } ++// CHECK-LABEL: @vfmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfmul_d(v2f64 _1, v2f64 _2) { return __lsx_vfmul_d(_1, _2); } ++// CHECK-LABEL: @vfdiv_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { return __lsx_vfdiv_s(_1, _2); } ++// CHECK-LABEL: @vfdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { return __lsx_vfdiv_d(_1, _2); } ++// CHECK-LABEL: @vfcvt_h_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { return __lsx_vfcvt_h_s(_1, _2); } ++// CHECK-LABEL: @vfcvt_s_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { return __lsx_vfcvt_s_d(_1, _2); } ++// CHECK-LABEL: @vfmin_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfmin_s(v4f32 _1, v4f32 _2) { return __lsx_vfmin_s(_1, _2); } ++// CHECK-LABEL: @vfmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfmin_d(v2f64 _1, v2f64 _2) { return __lsx_vfmin_d(_1, _2); } ++// CHECK-LABEL: @vfmina_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfmina_s(v4f32 _1, v4f32 _2) { return __lsx_vfmina_s(_1, _2); } ++// CHECK-LABEL: @vfmina_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfmina_d(v2f64 _1, v2f64 _2) { return __lsx_vfmina_d(_1, _2); } ++// CHECK-LABEL: @vfmax_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfmax_s(v4f32 _1, v4f32 _2) { return __lsx_vfmax_s(_1, _2); } ++// CHECK-LABEL: @vfmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfmax_d(v2f64 _1, v2f64 _2) { return __lsx_vfmax_d(_1, _2); } ++// CHECK-LABEL: @vfmaxa_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { return __lsx_vfmaxa_s(_1, _2); } ++// CHECK-LABEL: @vfmaxa_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { return __lsx_vfmaxa_d(_1, _2); } ++// CHECK-LABEL: @vfclass_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfclass_s(v4f32 _1) { return __lsx_vfclass_s(_1); } ++// CHECK-LABEL: @vfclass_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfclass_d(v2f64 _1) { return __lsx_vfclass_d(_1); } ++// CHECK-LABEL: @vfsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfsqrt_s(v4f32 _1) { return __lsx_vfsqrt_s(_1); } ++// CHECK-LABEL: @vfsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfsqrt_d(v2f64 _1) { return __lsx_vfsqrt_d(_1); } ++// CHECK-LABEL: @vfrecip_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfrecip_s(v4f32 _1) { return __lsx_vfrecip_s(_1); } ++// CHECK-LABEL: @vfrecip_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfrecip_d(v2f64 _1) { return __lsx_vfrecip_d(_1); } ++// CHECK-LABEL: @vfrint_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfrint_s(v4f32 _1) { return __lsx_vfrint_s(_1); } ++// CHECK-LABEL: @vfrint_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfrint_d(v2f64 _1) { return __lsx_vfrint_d(_1); } ++// CHECK-LABEL: @vfrsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfrsqrt_s(v4f32 _1) { return __lsx_vfrsqrt_s(_1); } ++// CHECK-LABEL: @vfrsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfrsqrt_d(v2f64 _1) { return __lsx_vfrsqrt_d(_1); } ++// CHECK-LABEL: @vflogb_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vflogb_s(v4f32 _1) { return __lsx_vflogb_s(_1); } ++// CHECK-LABEL: @vflogb_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vflogb_d(v2f64 _1) { return __lsx_vflogb_d(_1); } ++// CHECK-LABEL: @vfcvth_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfcvth_s_h(v8i16 _1) { return __lsx_vfcvth_s_h(_1); } ++// CHECK-LABEL: @vfcvth_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfcvth_d_s(v4f32 _1) { return __lsx_vfcvth_d_s(_1); } ++// CHECK-LABEL: @vfcvtl_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfcvtl_s_h(v8i16 _1) { return __lsx_vfcvtl_s_h(_1); } ++// CHECK-LABEL: @vfcvtl_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfcvtl_d_s(v4f32 _1) { return __lsx_vfcvtl_d_s(_1); } ++// CHECK-LABEL: @vftint_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftint_w_s(v4f32 _1) { return __lsx_vftint_w_s(_1); } ++// CHECK-LABEL: @vftint_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftint_l_d(v2f64 _1) { return __lsx_vftint_l_d(_1); } ++// CHECK-LABEL: @vftint_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vftint_wu_s(v4f32 _1) { return __lsx_vftint_wu_s(_1); } ++// CHECK-LABEL: @vftint_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vftint_lu_d(v2f64 _1) { return __lsx_vftint_lu_d(_1); } ++// CHECK-LABEL: @vftintrz_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftintrz_w_s(v4f32 _1) { return __lsx_vftintrz_w_s(_1); } ++// CHECK-LABEL: @vftintrz_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrz_l_d(v2f64 _1) { return __lsx_vftintrz_l_d(_1); } ++// CHECK-LABEL: @vftintrz_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vftintrz_wu_s(v4f32 _1) { return __lsx_vftintrz_wu_s(_1); } ++// CHECK-LABEL: @vftintrz_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vftintrz_lu_d(v2f64 _1) { return __lsx_vftintrz_lu_d(_1); } ++// CHECK-LABEL: @vffint_s_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vffint_s_w(v4i32 _1) { return __lsx_vffint_s_w(_1); } ++// CHECK-LABEL: @vffint_d_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vffint_d_l(v2i64 _1) { return __lsx_vffint_d_l(_1); } ++// CHECK-LABEL: @vffint_s_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vffint_s_wu(v4u32 _1) { return __lsx_vffint_s_wu(_1); } ++// CHECK-LABEL: @vffint_d_lu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vffint_d_lu(v2u64 _1) { return __lsx_vffint_d_lu(_1); } ++// CHECK-LABEL: @vandn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vandn_v(v16u8 _1, v16u8 _2) { return __lsx_vandn_v(_1, _2); } ++// CHECK-LABEL: @vneg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vneg_b(v16i8 _1) { return __lsx_vneg_b(_1); } ++// CHECK-LABEL: @vneg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vneg_h(v8i16 _1) { return __lsx_vneg_h(_1); } ++// CHECK-LABEL: @vneg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vneg_w(v4i32 _1) { return __lsx_vneg_w(_1); } ++// CHECK-LABEL: @vneg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vneg_d(v2i64 _1) { return __lsx_vneg_d(_1); } ++// CHECK-LABEL: @vmuh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __lsx_vmuh_b(_1, _2); } ++// CHECK-LABEL: @vmuh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __lsx_vmuh_h(_1, _2); } ++// CHECK-LABEL: @vmuh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __lsx_vmuh_w(_1, _2); } ++// CHECK-LABEL: @vmuh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __lsx_vmuh_d(_1, _2); } ++// CHECK-LABEL: @vmuh_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { return __lsx_vmuh_bu(_1, _2); } ++// CHECK-LABEL: @vmuh_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { return __lsx_vmuh_hu(_1, _2); } ++// CHECK-LABEL: @vmuh_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { return __lsx_vmuh_wu(_1, _2); } ++// CHECK-LABEL: @vmuh_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmuh_du(v2u64 _1, v2u64 _2) { return __lsx_vmuh_du(_1, _2); } ++// CHECK-LABEL: @vsllwil_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsllwil_h_b(v16i8 _1) { return __lsx_vsllwil_h_b(_1, 1); } ++// CHECK-LABEL: @vsllwil_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsllwil_w_h(v8i16 _1) { return __lsx_vsllwil_w_h(_1, 1); } ++// CHECK-LABEL: @vsllwil_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsllwil_d_w(v4i32 _1) { return __lsx_vsllwil_d_w(_1, 1); } ++// CHECK-LABEL: @vsllwil_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vsllwil_hu_bu(v16u8 _1) { return __lsx_vsllwil_hu_bu(_1, 1); } ++// CHECK-LABEL: @vsllwil_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vsllwil_wu_hu(v8u16 _1) { return __lsx_vsllwil_wu_hu(_1, 1); } ++// CHECK-LABEL: @vsllwil_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vsllwil_du_wu(v4u32 _1) { return __lsx_vsllwil_du_wu(_1, 1); } ++// CHECK-LABEL: @vsran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsran_b_h(_1, _2); } ++// CHECK-LABEL: @vsran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsran_h_w(_1, _2); } ++// CHECK-LABEL: @vsran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsran_w_d(_1, _2); } ++// CHECK-LABEL: @vssran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssran_b_h(_1, _2); } ++// CHECK-LABEL: @vssran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssran_h_w(_1, _2); } ++// CHECK-LABEL: @vssran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssran_w_d(_1, _2); } ++// CHECK-LABEL: @vssran_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssran_bu_h(_1, _2); } ++// CHECK-LABEL: @vssran_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssran_hu_w(_1, _2); } ++// CHECK-LABEL: @vssran_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssran_wu_d(_1, _2); } ++// CHECK-LABEL: @vsrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrarn_b_h(_1, _2); } ++// CHECK-LABEL: @vsrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrarn_h_w(_1, _2); } ++// CHECK-LABEL: @vsrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrarn_w_d(_1, _2); } ++// CHECK-LABEL: @vssrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrarn_b_h(_1, _2); } ++// CHECK-LABEL: @vssrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrarn_h_w(_1, _2); } ++// CHECK-LABEL: @vssrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrarn_w_d(_1, _2); } ++// CHECK-LABEL: @vssrarn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrarn_bu_h(_1, _2); } ++// CHECK-LABEL: @vssrarn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrarn_hu_w(_1, _2); } ++// CHECK-LABEL: @vssrarn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrarn_wu_d(_1, _2); } ++// CHECK-LABEL: @vsrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrln_b_h(_1, _2); } ++// CHECK-LABEL: @vsrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrln_h_w(_1, _2); } ++// CHECK-LABEL: @vsrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrln_w_d(_1, _2); } ++// CHECK-LABEL: @vssrln_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrln_bu_h(_1, _2); } ++// CHECK-LABEL: @vssrln_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrln_hu_w(_1, _2); } ++// CHECK-LABEL: @vssrln_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrln_wu_d(_1, _2); } ++// CHECK-LABEL: @vsrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @vsrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @vsrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @vssrlrn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrlrn_bu_h(_1, _2); } ++// CHECK-LABEL: @vssrlrn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrlrn_hu_w(_1, _2); } ++// CHECK-LABEL: @vssrlrn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrlrn_wu_d(_1, _2); } ++// CHECK-LABEL: @vfrstpi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { return __lsx_vfrstpi_b(_1, _2, 1); } ++// CHECK-LABEL: @vfrstpi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { return __lsx_vfrstpi_h(_1, _2, 1); } ++// CHECK-LABEL: @vfrstp_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vfrstp_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vfrstp_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vfrstp_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf4i_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { return __lsx_vshuf4i_d(_1, _2, 1); } ++// CHECK-LABEL: @vbsrl_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vbsrl_v(v16i8 _1) { return __lsx_vbsrl_v(_1, 1); } ++// CHECK-LABEL: @vbsll_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vbsll_v(v16i8 _1) { return __lsx_vbsll_v(_1, 1); } ++// CHECK-LABEL: @vextrins_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vextrins_b(v16i8 _1, v16i8 _2) { return __lsx_vextrins_b(_1, _2, 1); } ++// CHECK-LABEL: @vextrins_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vextrins_h(v8i16 _1, v8i16 _2) { return __lsx_vextrins_h(_1, _2, 1); } ++// CHECK-LABEL: @vextrins_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vextrins_w(v4i32 _1, v4i32 _2) { return __lsx_vextrins_w(_1, _2, 1); } ++// CHECK-LABEL: @vextrins_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vextrins_d(v2i64 _1, v2i64 _2) { return __lsx_vextrins_d(_1, _2, 1); } ++// CHECK-LABEL: @vmskltz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmskltz_b(v16i8 _1) { return __lsx_vmskltz_b(_1); } ++// CHECK-LABEL: @vmskltz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmskltz_h(v8i16 _1) { return __lsx_vmskltz_h(_1); } ++// CHECK-LABEL: @vmskltz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmskltz_w(v4i32 _1) { return __lsx_vmskltz_w(_1); } ++// CHECK-LABEL: @vmskltz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmskltz_d(v2i64 _1) { return __lsx_vmskltz_d(_1); } ++// CHECK-LABEL: @vsigncov_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { return __lsx_vsigncov_b(_1, _2); } ++// CHECK-LABEL: @vsigncov_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { return __lsx_vsigncov_h(_1, _2); } ++// CHECK-LABEL: @vsigncov_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { return __lsx_vsigncov_w(_1, _2); } ++// CHECK-LABEL: @vsigncov_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { return __lsx_vsigncov_d(_1, _2); } ++// CHECK-LABEL: @vfmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> _1:%.*, <4 x float> _2:%.*, <4 x float> _3:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __lsx_vfmadd_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> _1:%.*, <2 x double> _2:%.*, <2 x double> _3:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __lsx_vfmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> _1:%.*, <4 x float> _2:%.*, <4 x float> _3:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __lsx_vfmsub_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> _1:%.*, <2 x double> _2:%.*, <2 x double> _3:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __lsx_vfmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> _1:%.*, <4 x float> _2:%.*, <4 x float> _3:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __lsx_vfnmadd_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> _1:%.*, <2 x double> _2:%.*, <2 x double> _3:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __lsx_vfnmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> _1:%.*, <4 x float> _2:%.*, <4 x float> _3:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __lsx_vfnmsub_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> _1:%.*, <2 x double> _2:%.*, <2 x double> _3:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __lsx_vfnmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vftintrne_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftintrne_w_s(v4f32 _1) { return __lsx_vftintrne_w_s(_1); } ++// CHECK-LABEL: @vftintrne_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrne_l_d(v2f64 _1) { return __lsx_vftintrne_l_d(_1); } ++// CHECK-LABEL: @vftintrp_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftintrp_w_s(v4f32 _1) { return __lsx_vftintrp_w_s(_1); } ++// CHECK-LABEL: @vftintrp_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrp_l_d(v2f64 _1) { return __lsx_vftintrp_l_d(_1); } ++// CHECK-LABEL: @vftintrm_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftintrm_w_s(v4f32 _1) { return __lsx_vftintrm_w_s(_1); } ++// CHECK-LABEL: @vftintrm_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrm_l_d(v2f64 _1) { return __lsx_vftintrm_l_d(_1); } ++// CHECK-LABEL: @vftint_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftint_w_d(_1, _2); } ++// CHECK-LABEL: @vffint_s_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { return __lsx_vffint_s_l(_1, _2); } ++// CHECK-LABEL: @vftintrz_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrz_w_d(_1, _2); } ++// CHECK-LABEL: @vftintrp_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrp_w_d(_1, _2); } ++// CHECK-LABEL: @vftintrm_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrm_w_d(_1, _2); } ++// CHECK-LABEL: @vftintrne_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrne_w_d(_1, _2); } ++// CHECK-LABEL: @vftintl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintl_l_s(v4f32 _1) { return __lsx_vftintl_l_s(_1); } ++// CHECK-LABEL: @vftinth_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftinth_l_s(v4f32 _1) { return __lsx_vftinth_l_s(_1); } ++// CHECK-LABEL: @vffinth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vffinth_d_w(v4i32 _1) { return __lsx_vffinth_d_w(_1); } ++// CHECK-LABEL: @vffintl_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vffintl_d_w(v4i32 _1) { return __lsx_vffintl_d_w(_1); } ++// CHECK-LABEL: @vftintrzl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrzl_l_s(v4f32 _1) { return __lsx_vftintrzl_l_s(_1); } ++// CHECK-LABEL: @vftintrzh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrzh_l_s(v4f32 _1) { return __lsx_vftintrzh_l_s(_1); } ++// CHECK-LABEL: @vftintrpl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrpl_l_s(v4f32 _1) { return __lsx_vftintrpl_l_s(_1); } ++// CHECK-LABEL: @vftintrph_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrph_l_s(v4f32 _1) { return __lsx_vftintrph_l_s(_1); } ++// CHECK-LABEL: @vftintrml_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrml_l_s(v4f32 _1) { return __lsx_vftintrml_l_s(_1); } ++// CHECK-LABEL: @vftintrmh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrmh_l_s(v4f32 _1) { return __lsx_vftintrmh_l_s(_1); } ++// CHECK-LABEL: @vftintrnel_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrnel_l_s(v4f32 _1) { return __lsx_vftintrnel_l_s(_1); } ++// CHECK-LABEL: @vftintrneh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrneh_l_s(v4f32 _1) { return __lsx_vftintrneh_l_s(_1); } ++// CHECK-LABEL: @vfrintrne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x float> TMP0 to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> TMP1 ++// ++v4i32 vfrintrne_s(v4f32 _1) { return __lsx_vfrintrne_s(_1); } ++// CHECK-LABEL: @vfrintrne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <2 x double> TMP0 to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> TMP1 ++// ++v2i64 vfrintrne_d(v2f64 _1) { return __lsx_vfrintrne_d(_1); } ++// CHECK-LABEL: @vfrintrz_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x float> TMP0 to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> TMP1 ++// ++v4i32 vfrintrz_s(v4f32 _1) { return __lsx_vfrintrz_s(_1); } ++// CHECK-LABEL: @vfrintrz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <2 x double> TMP0 to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> TMP1 ++// ++v2i64 vfrintrz_d(v2f64 _1) { return __lsx_vfrintrz_d(_1); } ++// CHECK-LABEL: @vfrintrp_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x float> TMP0 to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> TMP1 ++// ++v4i32 vfrintrp_s(v4f32 _1) { return __lsx_vfrintrp_s(_1); } ++// CHECK-LABEL: @vfrintrp_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <2 x double> TMP0 to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> TMP1 ++// ++v2i64 vfrintrp_d(v2f64 _1) { return __lsx_vfrintrp_d(_1); } ++// CHECK-LABEL: @vfrintrm_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x float> TMP0 to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> TMP1 ++// ++v4i32 vfrintrm_s(v4f32 _1) { return __lsx_vfrintrm_s(_1); } ++// CHECK-LABEL: @vfrintrm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <2 x double> TMP0 to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> TMP1 ++// ++v2i64 vfrintrm_d(v2f64 _1) { return __lsx_vfrintrm_d(_1); } ++// CHECK-LABEL: @vstelm_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> _1:%.*, ptr _2:%.*, i32 1, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_b(v16i8 _1, void *_2) { return __lsx_vstelm_b(_1, _2, 1, 1); } ++// CHECK-LABEL: @vstelm_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> _1:%.*, ptr _2:%.*, i32 2, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_h(v8i16 _1, void *_2) { return __lsx_vstelm_h(_1, _2, 2, 1); } ++// CHECK-LABEL: @vstelm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> _1:%.*, ptr _2:%.*, i32 4, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_w(v4i32 _1, void *_2) { return __lsx_vstelm_w(_1, _2, 4, 1); } ++// CHECK-LABEL: @vstelm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> _1:%.*, ptr _2:%.*, i32 8, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_d(v2i64 _1, void *_2) { return __lsx_vstelm_d(_1, _2, 8, 1); } ++// CHECK-LABEL: @vaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwev_d_w(_1, _2); } ++// CHECK-LABEL: @vaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwev_w_h(_1, _2); } ++// CHECK-LABEL: @vaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwev_h_b(_1, _2); } ++// CHECK-LABEL: @vaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwod_d_w(_1, _2); } ++// CHECK-LABEL: @vaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwod_w_h(_1, _2); } ++// CHECK-LABEL: @vaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwod_h_b(_1, _2); } ++// CHECK-LABEL: @vaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwev_d_wu(_1, _2); } ++// CHECK-LABEL: @vaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwev_w_hu(_1, _2); } ++// CHECK-LABEL: @vaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwev_h_bu(_1, _2); } ++// CHECK-LABEL: @vaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwod_d_wu(_1, _2); } ++// CHECK-LABEL: @vaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwod_w_hu(_1, _2); } ++// CHECK-LABEL: @vaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwod_h_bu(_1, _2); } ++// CHECK-LABEL: @vaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __lsx_vaddwev_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __lsx_vaddwev_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __lsx_vaddwev_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __lsx_vaddwod_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __lsx_vaddwod_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __lsx_vaddwod_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwev_d_w(_1, _2); } ++// CHECK-LABEL: @vsubwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwev_w_h(_1, _2); } ++// CHECK-LABEL: @vsubwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwev_h_b(_1, _2); } ++// CHECK-LABEL: @vsubwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwod_d_w(_1, _2); } ++// CHECK-LABEL: @vsubwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwod_w_h(_1, _2); } ++// CHECK-LABEL: @vsubwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwod_h_b(_1, _2); } ++// CHECK-LABEL: @vsubwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwev_d_wu(_1, _2); } ++// CHECK-LABEL: @vsubwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwev_w_hu(_1, _2); } ++// CHECK-LABEL: @vsubwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwev_h_bu(_1, _2); } ++// CHECK-LABEL: @vsubwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwod_d_wu(_1, _2); } ++// CHECK-LABEL: @vsubwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwod_w_hu(_1, _2); } ++// CHECK-LABEL: @vsubwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwod_h_bu(_1, _2); } ++// CHECK-LABEL: @vaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwev_q_d(_1, _2); } ++// CHECK-LABEL: @vaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwod_q_d(_1, _2); } ++// CHECK-LABEL: @vaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwev_q_du(_1, _2); } ++// CHECK-LABEL: @vaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwod_q_du(_1, _2); } ++// CHECK-LABEL: @vsubwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwev_q_d(_1, _2); } ++// CHECK-LABEL: @vsubwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwod_q_d(_1, _2); } ++// CHECK-LABEL: @vsubwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwev_q_du(_1, _2); } ++// CHECK-LABEL: @vsubwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwod_q_du(_1, _2); } ++// CHECK-LABEL: @vaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { ++ return __lsx_vaddwev_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { ++ return __lsx_vaddwod_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwev_d_w(_1, _2); } ++// CHECK-LABEL: @vmulwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwev_w_h(_1, _2); } ++// CHECK-LABEL: @vmulwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwev_h_b(_1, _2); } ++// CHECK-LABEL: @vmulwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwod_d_w(_1, _2); } ++// CHECK-LABEL: @vmulwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwod_w_h(_1, _2); } ++// CHECK-LABEL: @vmulwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwod_h_b(_1, _2); } ++// CHECK-LABEL: @vmulwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwev_d_wu(_1, _2); } ++// CHECK-LABEL: @vmulwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwev_w_hu(_1, _2); } ++// CHECK-LABEL: @vmulwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwev_h_bu(_1, _2); } ++// CHECK-LABEL: @vmulwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwod_d_wu(_1, _2); } ++// CHECK-LABEL: @vmulwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwod_w_hu(_1, _2); } ++// CHECK-LABEL: @vmulwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwod_h_bu(_1, _2); } ++// CHECK-LABEL: @vmulwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __lsx_vmulwev_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __lsx_vmulwev_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __lsx_vmulwev_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __lsx_vmulwod_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __lsx_vmulwod_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __lsx_vmulwod_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwev_q_d(_1, _2); } ++// CHECK-LABEL: @vmulwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwod_q_d(_1, _2); } ++// CHECK-LABEL: @vmulwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwev_q_du(_1, _2); } ++// CHECK-LABEL: @vmulwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwod_q_du(_1, _2); } ++// CHECK-LABEL: @vmulwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { ++ return __lsx_vmulwev_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { ++ return __lsx_vmulwod_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhaddw_q_d(_1, _2); } ++// CHECK-LABEL: @vhaddw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhaddw_qu_du(_1, _2); } ++// CHECK-LABEL: @vhsubw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhsubw_q_d(_1, _2); } ++// CHECK-LABEL: @vhsubw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhsubw_qu_du(_1, _2); } ++// CHECK-LABEL: @vmaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { ++ return __lsx_vmaddwev_d_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vmaddwev_w_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vmaddwev_h_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { ++ return __lsx_vmaddwev_d_wu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { ++ return __lsx_vmaddwev_w_hu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { ++ return __lsx_vmaddwev_h_bu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { ++ return __lsx_vmaddwod_d_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { ++ return __lsx_vmaddwod_w_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vmaddwod_h_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { ++ return __lsx_vmaddwod_d_wu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { ++ return __lsx_vmaddwod_w_hu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { ++ return __lsx_vmaddwod_h_bu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { ++ return __lsx_vmaddwev_d_wu_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { ++ return __lsx_vmaddwev_w_hu_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { ++ return __lsx_vmaddwev_h_bu_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { ++ return __lsx_vmaddwod_d_wu_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { ++ return __lsx_vmaddwod_w_hu_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { ++ return __lsx_vmaddwod_h_bu_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __lsx_vmaddwev_q_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __lsx_vmaddwod_q_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { ++ return __lsx_vmaddwev_q_du(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { ++ return __lsx_vmaddwod_q_du(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { ++ return __lsx_vmaddwev_q_du_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { ++ return __lsx_vmaddwod_q_du_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vrotr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vrotr_b(v16i8 _1, v16i8 _2) { return __lsx_vrotr_b(_1, _2); } ++// CHECK-LABEL: @vrotr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vrotr_h(v8i16 _1, v8i16 _2) { return __lsx_vrotr_h(_1, _2); } ++// CHECK-LABEL: @vrotr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vrotr_w(v4i32 _1, v4i32 _2) { return __lsx_vrotr_w(_1, _2); } ++// CHECK-LABEL: @vrotr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vrotr_d(v2i64 _1, v2i64 _2) { return __lsx_vrotr_d(_1, _2); } ++// CHECK-LABEL: @vadd_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __lsx_vadd_q(_1, _2); } ++// CHECK-LABEL: @vsub_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __lsx_vsub_q(_1, _2); } ++// CHECK-LABEL: @vldrepl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vldrepl_b(void *_1) { return __lsx_vldrepl_b(_1, 1); } ++// CHECK-LABEL: @vldrepl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr _1:%.*, i32 2) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vldrepl_h(void *_1) { return __lsx_vldrepl_h(_1, 2); } ++// CHECK-LABEL: @vldrepl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr _1:%.*, i32 4) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vldrepl_w(void *_1) { return __lsx_vldrepl_w(_1, 4); } ++// CHECK-LABEL: @vldrepl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr _1:%.*, i32 8) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vldrepl_d(void *_1) { return __lsx_vldrepl_d(_1, 8); } ++// CHECK-LABEL: @vmskgez_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmskgez_b(v16i8 _1) { return __lsx_vmskgez_b(_1); } ++// CHECK-LABEL: @vmsknz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmsknz_b(v16i8 _1) { return __lsx_vmsknz_b(_1); } ++// CHECK-LABEL: @vexth_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vexth_h_b(v16i8 _1) { return __lsx_vexth_h_b(_1); } ++// CHECK-LABEL: @vexth_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vexth_w_h(v8i16 _1) { return __lsx_vexth_w_h(_1); } ++// CHECK-LABEL: @vexth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vexth_d_w(v4i32 _1) { return __lsx_vexth_d_w(_1); } ++// CHECK-LABEL: @vexth_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vexth_q_d(v2i64 _1) { return __lsx_vexth_q_d(_1); } ++// CHECK-LABEL: @vexth_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vexth_hu_bu(v16u8 _1) { return __lsx_vexth_hu_bu(_1); } ++// CHECK-LABEL: @vexth_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vexth_wu_hu(v8u16 _1) { return __lsx_vexth_wu_hu(_1); } ++// CHECK-LABEL: @vexth_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vexth_du_wu(v4u32 _1) { return __lsx_vexth_du_wu(_1); } ++// CHECK-LABEL: @vexth_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vexth_qu_du(v2u64 _1) { return __lsx_vexth_qu_du(_1); } ++// CHECK-LABEL: @vrotri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vrotri_b(v16i8 _1) { return __lsx_vrotri_b(_1, 1); } ++// CHECK-LABEL: @vrotri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vrotri_h(v8i16 _1) { return __lsx_vrotri_h(_1, 1); } ++// CHECK-LABEL: @vrotri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vrotri_w(v4i32 _1) { return __lsx_vrotri_w(_1, 1); } ++// CHECK-LABEL: @vrotri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vrotri_d(v2i64 _1) { return __lsx_vrotri_d(_1, 1); } ++// CHECK-LABEL: @vextl_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vextl_q_d(v2i64 _1) { return __lsx_vextl_q_d(_1); } ++// CHECK-LABEL: @vsrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vsrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vsrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vsrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vsrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vsrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vsrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vsrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrlni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrlni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrlni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrlni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrlni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { ++ return __lsx_vssrlrni_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { ++ return __lsx_vssrlrni_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { ++ return __lsx_vssrlrni_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { ++ return __lsx_vssrlrni_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vsrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vsrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vsrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vsrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vsrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vsrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vsrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrani_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrani_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrani_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrani_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrani_du_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @vssrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @vssrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @vssrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @vssrarni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { ++ return __lsx_vssrarni_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { ++ return __lsx_vssrarni_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { ++ return __lsx_vssrarni_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { ++ return __lsx_vssrarni_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vpermi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vpermi_w(v4i32 _1, v4i32 _2) { return __lsx_vpermi_w(_1, _2, 1); } ++// CHECK-LABEL: @vld( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vld(void *_1) { return __lsx_vld(_1, 1); } ++// CHECK-LABEL: @vst( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> _1:%.*, ptr _2:%.*, i32 1) ++// CHECK-NEXT: ret void ++// ++void vst(v16i8 _1, void *_2) { return __lsx_vst(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @vssrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @vssrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @vssrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrln_b_h(_1, _2); } ++// CHECK-LABEL: @vssrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrln_h_w(_1, _2); } ++// CHECK-LABEL: @vssrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrln_w_d(_1, _2); } ++// CHECK-LABEL: @vorn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __lsx_vorn_v(_1, _2); } ++// CHECK-LABEL: @vldi( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vldi() { return __lsx_vldi(1); } ++// CHECK-LABEL: @vshuf_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __lsx_vshuf_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vldx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr _1:%.*, i64 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vldx(void *_1) { return __lsx_vldx(_1, 1); } ++// CHECK-LABEL: @vstx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> _1:%.*, ptr _2:%.*, i64 1) ++// CHECK-NEXT: ret void ++// ++void vstx(v16i8 _1, void *_2) { return __lsx_vstx(_1, _2, 1); } ++// CHECK-LABEL: @vextl_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vextl_qu_du(v2u64 _1) { return __lsx_vextl_qu_du(_1); } ++// CHECK-LABEL: @bnz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bnz_b(v16u8 _1) { return __lsx_bnz_b(_1); } ++// CHECK-LABEL: @bnz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bnz_d(v2u64 _1) { return __lsx_bnz_d(_1); } ++// CHECK-LABEL: @bnz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bnz_h(v8u16 _1) { return __lsx_bnz_h(_1); } ++// CHECK-LABEL: @bnz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bnz_v(v16u8 _1) { return __lsx_bnz_v(_1); } ++// CHECK-LABEL: @bnz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bnz_w(v4u32 _1) { return __lsx_bnz_w(_1); } ++// CHECK-LABEL: @bz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bz_b(v16u8 _1) { return __lsx_bz_b(_1); } ++// CHECK-LABEL: @bz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bz_d(v2u64 _1) { return __lsx_bz_d(_1); } ++// CHECK-LABEL: @bz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bz_h(v8u16 _1) { return __lsx_bz_h(_1); } ++// CHECK-LABEL: @bz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bz_v(v16u8 _1) { return __lsx_bz_v(_1); } ++// CHECK-LABEL: @bz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bz_w(v4u32 _1) { return __lsx_bz_w(_1); } ++// CHECK-LABEL: @vfcmp_caf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_caf_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_caf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_caf_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_ceq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_ceq_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_ceq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_ceq_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cle_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cle_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_clt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_clt_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_clt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_clt_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cne_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cne_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cor_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cor_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cueq_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cueq_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cule_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cule_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cult_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cult_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cun_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cune_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_cune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cune_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_cun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cun_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_saf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_saf_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_saf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_saf_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_seq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_seq_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_seq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_seq_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sle_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sle_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_slt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_slt_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_slt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_slt_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sne_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sne_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sor_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sor_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sueq_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sueq_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sule_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sule_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sult_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sult_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sun_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sune_d(_1, _2); } ++// CHECK-LABEL: @vfcmp_sune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sune_s(_1, _2); } ++// CHECK-LABEL: @vfcmp_sun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sun_s(_1, _2); } ++// CHECK-LABEL: @vrepli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vrepli_b() { return __lsx_vrepli_b(1); } ++// CHECK-LABEL: @vrepli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vrepli_d() { return __lsx_vrepli_d(1); } ++// CHECK-LABEL: @vrepli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vrepli_h() { return __lsx_vrepli_h(1); } ++// CHECK-LABEL: @vrepli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vrepli_w() { return __lsx_vrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-error.c b/clang/test/CodeGen/LoongArch/lsx/builtin-error.c +new file mode 100644 +index 000000000000..3fc5f73f1193 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin-error.c +@@ -0,0 +1,1382 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -verify %s ++ ++typedef signed char v16i8 __attribute__((vector_size(16), aligned(16))); ++typedef signed char v16i8_b __attribute__((vector_size(16), aligned(1))); ++typedef unsigned char v16u8 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned char v16u8_b __attribute__((vector_size(16), aligned(1))); ++typedef short v8i16 __attribute__((vector_size(16), aligned(16))); ++typedef short v8i16_h __attribute__((vector_size(16), aligned(2))); ++typedef unsigned short v8u16 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned short v8u16_h __attribute__((vector_size(16), aligned(2))); ++typedef int v4i32 __attribute__((vector_size(16), aligned(16))); ++typedef int v4i32_w __attribute__((vector_size(16), aligned(4))); ++typedef unsigned int v4u32 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned int v4u32_w __attribute__((vector_size(16), aligned(4))); ++typedef long long v2i64 __attribute__((vector_size(16), aligned(16))); ++typedef long long v2i64_d __attribute__((vector_size(16), aligned(8))); ++typedef unsigned long long v2u64 __attribute__((vector_size(16), aligned(16))); ++typedef unsigned long long v2u64_d __attribute__((vector_size(16), aligned(8))); ++typedef float v4f32 __attribute__((vector_size(16), aligned(16))); ++typedef float v4f32_w __attribute__((vector_size(16), aligned(4))); ++typedef double v2f64 __attribute__((vector_size(16), aligned(16))); ++typedef double v2f64_d __attribute__((vector_size(16), aligned(8))); ++ ++typedef long long __m128i __attribute__((__vector_size__(16), __may_alias__)); ++typedef float __m128 __attribute__((__vector_size__(16), __may_alias__)); ++typedef double __m128d __attribute__((__vector_size__(16), __may_alias__)); ++ ++v16i8 vslli_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vslli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslli_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vslli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslli_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslli_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vslli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrai_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vsrai_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrai_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsrai_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrai_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsrai_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrai_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsrai_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrai_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrari_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vsrari_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrari_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsrari_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrari_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsrari_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrari_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsrari_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrari_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrli_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vsrli_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrli_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsrli_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrli_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsrli_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrli_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsrli_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlri_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vsrlri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlri_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsrlri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlri_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsrlri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlri_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsrlri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsrlri_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitclri_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vbitclri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitclri_h(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vbitclri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitclri_w(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vbitclri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitclri_d(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vbitclri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitclri_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitseti_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vbitseti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitseti_h(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vbitseti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitseti_w(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vbitseti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitseti_d(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vbitseti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitseti_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitrevi_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_b' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vbitrevi_h(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_h' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vbitrevi_w(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_w' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vbitrevi_d(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vbitrevi_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vaddi_bu(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vaddi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vaddi_hu(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vaddi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vaddi_wu(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vaddi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vaddi_du(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vaddi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vaddi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsubi_bu(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsubi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsubi_hu(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsubi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsubi_wu(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsubi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsubi_du(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsubi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsubi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vmaxi_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vmaxi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vmaxi_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vmaxi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vmaxi_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vmaxi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vmaxi_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vmaxi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vmaxi_bu(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vmaxi_hu(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vmaxi_wu(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vmaxi_du(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vmaxi_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmaxi_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vmini_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vmini_b(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vmini_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vmini_h(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_h' must be a constant integer}}} ++ return res; ++} ++ ++v4i32 vmini_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vmini_w(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vmini_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vmini_d(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vmini_bu(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vmini_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vmini_hu(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vmini_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vmini_wu(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vmini_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vmini_du(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vmini_du(_1, var); // expected-error {{argument to '__builtin_lsx_vmini_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vseqi_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vseqi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vseqi_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vseqi_h(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vseqi_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vseqi_w(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vseqi_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vseqi_d(_1, var); // expected-error {{argument to '__builtin_lsx_vseqi_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslti_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vslti_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslti_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vslti_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslti_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vslti_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslti_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vslti_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslti_bu(v16u8 _1, int var) { ++ v16i8 res = __builtin_lsx_vslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslti_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslti_hu(v8u16 _1, int var) { ++ v8i16 res = __builtin_lsx_vslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslti_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslti_wu(v4u32 _1, int var) { ++ v4i32 res = __builtin_lsx_vslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslti_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslti_du(v2u64 _1, int var) { ++ v2i64 res = __builtin_lsx_vslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslti_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslti_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslei_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vslei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslei_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vslei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslei_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vslei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslei_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lsx_vslei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vslei_bu(v16u8 _1, int var) { ++ v16i8 res = __builtin_lsx_vslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslei_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_bu' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vslei_hu(v8u16 _1, int var) { ++ v8i16 res = __builtin_lsx_vslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslei_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_hu' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vslei_wu(v4u32 _1, int var) { ++ v4i32 res = __builtin_lsx_vslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslei_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_wu' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vslei_du(v2u64 _1, int var) { ++ v2i64 res = __builtin_lsx_vslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vslei_du(_1, var); // expected-error {{argument to '__builtin_lsx_vslei_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsat_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vsat_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsat_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsat_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsat_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsat_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsat_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsat_d(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vsat_bu(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vsat_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vsat_hu(v8u16 _1, int var) { ++ v8u16 res = __builtin_lsx_vsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsat_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vsat_wu(v4u32 _1, int var) { ++ v4u32 res = __builtin_lsx_vsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsat_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_wu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vsat_du(v2u64 _1, int var) { ++ v2u64 res = __builtin_lsx_vsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsat_du(_1, var); // expected-error {{argument to '__builtin_lsx_vsat_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vreplvei_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vreplvei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vreplvei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vreplvei_b(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vreplvei_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vreplvei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vreplvei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vreplvei_h(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vreplvei_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vreplvei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __builtin_lsx_vreplvei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __builtin_lsx_vreplvei_w(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vreplvei_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vreplvei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 1}} ++ res |= __builtin_lsx_vreplvei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range 0, 1}} ++ res |= __builtin_lsx_vreplvei_d(_1, var); // expected-error {{argument to '__builtin_lsx_vreplvei_d' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vandi_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vandi_b(_1, var); // expected-error {{argument to '__builtin_lsx_vandi_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vori_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vnori_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vnori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vnori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vxori_b(v16u8 _1, int var) { ++ v16u8 res = __builtin_lsx_vxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vxori_b(_1, var); // expected-error {{argument to '__builtin_lsx_vxori_b' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vbitseli_b(v16u8 _1, v16u8 _2, int var) { ++ v16u8 res = __builtin_lsx_vbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vbitseli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vshuf4i_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vshuf4i_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vshuf4i_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_w' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_b(v16i8 _1, int var) { ++ int res = __builtin_lsx_vpickve2gr_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vpickve2gr_b(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vpickve2gr_b(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_b' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_h(v8i16 _1, int var) { ++ int res = __builtin_lsx_vpickve2gr_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vpickve2gr_h(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vpickve2gr_h(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_h' must be a constant integer}} ++ return res; ++} ++ ++int vpickve2gr_w(v4i32 _1, int var) { ++ int res = __builtin_lsx_vpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __builtin_lsx_vpickve2gr_w(_1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __builtin_lsx_vpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_w' must be a constant integer}} ++ return res; ++} ++ ++long vpickve2gr_d(v2i64 _1, int var) { ++ long res = __builtin_lsx_vpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 1}} ++ res |= __builtin_lsx_vpickve2gr_d(_1, 2); // expected-error {{argument value 2 is outside the valid range 0, 1}} ++ res |= __builtin_lsx_vpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_d' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_bu(v16i8 _1, int var) { ++ unsigned int res = __builtin_lsx_vpickve2gr_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vpickve2gr_bu(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vpickve2gr_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_bu' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_hu(v8i16 _1, int var) { ++ unsigned int res = __builtin_lsx_vpickve2gr_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vpickve2gr_hu(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vpickve2gr_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_hu' must be a constant integer}} ++ return res; ++} ++ ++unsigned int vpickve2gr_wu(v4i32 _1, int var) { ++ unsigned int res = __builtin_lsx_vpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __builtin_lsx_vpickve2gr_wu(_1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __builtin_lsx_vpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_wu' must be a constant integer}} ++ return res; ++} ++ ++unsigned long int vpickve2gr_du(v2i64 _1, int var) { ++ unsigned long int res = __builtin_lsx_vpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 1}} ++ res |= __builtin_lsx_vpickve2gr_du(_1, 2); // expected-error {{argument value 2 is outside the valid range 0, 1}} ++ res |= __builtin_lsx_vpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lsx_vpickve2gr_du' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vinsgr2vr_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vinsgr2vr_b(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vinsgr2vr_b(_1, 1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vinsgr2vr_b(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vinsgr2vr_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vinsgr2vr_h(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vinsgr2vr_h(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vinsgr2vr_h(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vinsgr2vr_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __builtin_lsx_vinsgr2vr_w(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __builtin_lsx_vinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vinsgr2vr_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 1}} ++ res |= __builtin_lsx_vinsgr2vr_d(_1, 1, 2); // expected-error {{argument value 2 is outside the valid range 0, 1}} ++ res |= __builtin_lsx_vinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lsx_vinsgr2vr_d' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsllwil_h_b(v16i8 _1, int var) { ++ v8i16 res = __builtin_lsx_vsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_h_b' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsllwil_w_h(v8i16 _1, int var) { ++ v4i32 res = __builtin_lsx_vsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_w_h' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsllwil_d_w(v4i32 _1, int var) { ++ v2i64 res = __builtin_lsx_vsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_d_w' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vsllwil_hu_bu(v16u8 _1, int var) { ++ v8u16 res = __builtin_lsx_vsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_hu_bu' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vsllwil_wu_hu(v8u16 _1, int var) { ++ v4u32 res = __builtin_lsx_vsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_wu_hu' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vsllwil_du_wu(v4u32 _1, int var) { ++ v2u64 res = __builtin_lsx_vsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lsx_vsllwil_du_wu' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vfrstpi_b(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vfrstpi_h(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vfrstpi_h' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vshuf4i_d(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vshuf4i_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vbsrl_v(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vbsrl_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsrl_v' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vbsll_v(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vbsll_v(_1, var); // expected-error {{argument to '__builtin_lsx_vbsll_v' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vextrins_b(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vextrins_h(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vextrins_w(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vextrins_d(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vextrins_d' must be a constant integer}} ++ return res; ++} ++ ++void vstelm_b_idx(v16i8 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ __builtin_lsx_vstelm_b(_1, _2, 1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ __builtin_lsx_vstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} ++} ++ ++void vstelm_h_idx(v8i16 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ __builtin_lsx_vstelm_h(_1, _2, 2, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ __builtin_lsx_vstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} ++} ++ ++void vstelm_w_idx(v4i32 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ __builtin_lsx_vstelm_w(_1, _2, 4, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ __builtin_lsx_vstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} ++} ++ ++void vstelm_d_idx(v2i64 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 1}} ++ __builtin_lsx_vstelm_d(_1, _2, 8, 2); // expected-error {{argument value 2 is outside the valid range 0, 1}} ++ __builtin_lsx_vstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} ++} ++ ++void vstelm_b(v16i8 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range -128, 127}} ++ __builtin_lsx_vstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range -128, 127}} ++ __builtin_lsx_vstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_b' must be a constant integer}} ++} ++ ++void vstelm_h(v8i16 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range -256, 254}} ++ __builtin_lsx_vstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range -256, 254}} ++ __builtin_lsx_vstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_h' must be a constant integer}} ++} ++ ++void vstelm_w(v4i32 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range -512, 508}} ++ __builtin_lsx_vstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range -512, 508}} ++ __builtin_lsx_vstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_w' must be a constant integer}} ++} ++ ++void vstelm_d(v2i64 _1, void *_2, int var) { ++ __builtin_lsx_vstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range -1024, 1016}} ++ __builtin_lsx_vstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range -1024, 1016}} ++ __builtin_lsx_vstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lsx_vstelm_d' must be a constant integer}} ++} ++ ++v16i8 vldrepl_b(void *_1, int var) { ++ v16i8 res = __builtin_lsx_vldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range -2048, 2047}} ++ res |= __builtin_lsx_vldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2047}} ++ res |= __builtin_lsx_vldrepl_b(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vldrepl_h(void *_1, int var) { ++ v8i16 res = __builtin_lsx_vldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range -2048, 2046}} ++ res |= __builtin_lsx_vldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2046}} ++ res |= __builtin_lsx_vldrepl_h(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vldrepl_w(void *_1, int var) { ++ v4i32 res = __builtin_lsx_vldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range -2048, 2044}} ++ res |= __builtin_lsx_vldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2044}} ++ res |= __builtin_lsx_vldrepl_w(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vldrepl_d(void *_1, int var) { ++ v2i64 res = __builtin_lsx_vldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range -2048, 2040}} ++ res |= __builtin_lsx_vldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2040}} ++ res |= __builtin_lsx_vldrepl_d(_1, var); // expected-error {{argument to '__builtin_lsx_vldrepl_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vrotri_b(v16i8 _1, int var) { ++ v16i8 res = __builtin_lsx_vrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lsx_vrotri_b(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_b' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vrotri_h(v8i16 _1, int var) { ++ v8i16 res = __builtin_lsx_vrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vrotri_h(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vrotri_w(v4i32 _1, int var) { ++ v4i32 res = __builtin_lsx_vrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vrotri_w(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_w' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vrotri_d(v2i64 _1, int var) { ++ v2i64 res = __builtin_lsx_vrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vrotri_d(_1, var); // expected-error {{argument to '__builtin_lsx_vrotri_d' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __builtin_lsx_vssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __builtin_lsx_vssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __builtin_lsx_vssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __builtin_lsx_vssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __builtin_lsx_vssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __builtin_lsx_vssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __builtin_lsx_vssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __builtin_lsx_vssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrlrni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrani_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrani_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrani_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrani_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vsrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrani_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrani_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrani_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrani_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __builtin_lsx_vssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __builtin_lsx_vssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __builtin_lsx_vssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrani_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __builtin_lsx_vssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrani_du_q' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2, int var) { ++ v16i8 res = __builtin_lsx_vssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2, int var) { ++ v8i16 res = __builtin_lsx_vssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2, int var) { ++ v2i64 res = __builtin_lsx_vssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2, int var) { ++ v16u8 res = __builtin_lsx_vssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lsx_vssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2, int var) { ++ v8u16 res = __builtin_lsx_vssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lsx_vssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2, int var) { ++ v4u32 res = __builtin_lsx_vssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lsx_vssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2, int var) { ++ v2u64 res = __builtin_lsx_vssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lsx_vssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vssrarni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vpermi_w(v4i32 _1, v4i32 _2, int var) { ++ v4i32 res = __builtin_lsx_vpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lsx_vpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vpermi_w' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vld(void *_1, int var) { ++ v16i8 res = __builtin_lsx_vld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range -2048, 2047}} ++ res |= __builtin_lsx_vld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2047}} ++ res |= __builtin_lsx_vld(_1, var); // expected-error {{argument to '__builtin_lsx_vld' must be a constant integer}} ++ return res; ++} ++ ++void vst(v16i8 _1, void *_2, int var) { ++ __builtin_lsx_vst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range -2048, 2047}} ++ __builtin_lsx_vst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2047}} ++ __builtin_lsx_vst(_1, _2, var); // expected-error {{argument to '__builtin_lsx_vst' must be a constant integer}} ++} ++ ++v2i64 vldi(int var) { ++ v2i64 res = __builtin_lsx_vldi(-4097); // expected-error {{argument value -4097 is outside the valid range -4096, 4095}} ++ res |= __builtin_lsx_vldi(4096); // expected-error {{argument value 4096 is outside the valid range -4096, 4095}} ++ res |= __builtin_lsx_vldi(var); // expected-error {{argument to '__builtin_lsx_vldi' must be a constant integer}} ++ return res; ++} ++ ++v16i8 vrepli_b(int var) { ++ v16i8 res = __builtin_lsx_vrepli_b(-513); // expected-error {{argument value -513 is outside the valid range -512, 511}} ++ res |= __builtin_lsx_vrepli_b(512); // expected-error {{argument value 512 is outside the valid range -512, 511}} ++ res |= __builtin_lsx_vrepli_b(var); // expected-error {{argument to '__builtin_lsx_vrepli_b' must be a constant integer}} ++ return res; ++} ++ ++v2i64 vrepli_d(int var) { ++ v2i64 res = __builtin_lsx_vrepli_d(-513); // expected-error {{argument value -513 is outside the valid range -512, 511}} ++ res |= __builtin_lsx_vrepli_d(512); // expected-error {{argument value 512 is outside the valid range -512, 511}} ++ res |= __builtin_lsx_vrepli_d(var); // expected-error {{argument to '__builtin_lsx_vrepli_d' must be a constant integer}} ++ return res; ++} ++ ++v8i16 vrepli_h(int var) { ++ v8i16 res = __builtin_lsx_vrepli_h(-513); // expected-error {{argument value -513 is outside the valid range -512, 511}} ++ res |= __builtin_lsx_vrepli_h(512); // expected-error {{argument value 512 is outside the valid range -512, 511}} ++ res |= __builtin_lsx_vrepli_h(var); // expected-error {{argument to '__builtin_lsx_vrepli_h' must be a constant integer}} ++ return res; ++} ++ ++v4i32 vrepli_w(int var) { ++ v4i32 res = __builtin_lsx_vrepli_w(-513); // expected-error {{argument value -513 is outside the valid range -512, 511}} ++ res |= __builtin_lsx_vrepli_w(512); // expected-error {{argument value 512 is outside the valid range -512, 511}} ++ res |= __builtin_lsx_vrepli_w(var); // expected-error {{argument to '__builtin_lsx_vrepli_w' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin.c b/clang/test/CodeGen/LoongArch/lsx/builtin.c +new file mode 100644 +index 000000000000..ef5a390e1838 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin.c +@@ -0,0 +1,5193 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lsx -O2 -emit-llvm %s -o - | FileCheck %s ++ ++typedef signed char v16i8 __attribute__ ((vector_size(16), aligned(16))); ++typedef signed char v16i8_b __attribute__ ((vector_size(16), aligned(1))); ++typedef unsigned char v16u8 __attribute__ ((vector_size(16), aligned(16))); ++typedef unsigned char v16u8_b __attribute__ ((vector_size(16), aligned(1))); ++typedef short v8i16 __attribute__ ((vector_size(16), aligned(16))); ++typedef short v8i16_h __attribute__ ((vector_size(16), aligned(2))); ++typedef unsigned short v8u16 __attribute__ ((vector_size(16), aligned(16))); ++typedef unsigned short v8u16_h __attribute__ ((vector_size(16), aligned(2))); ++typedef int v4i32 __attribute__ ((vector_size(16), aligned(16))); ++typedef int v4i32_w __attribute__ ((vector_size(16), aligned(4))); ++typedef unsigned int v4u32 __attribute__ ((vector_size(16), aligned(16))); ++typedef unsigned int v4u32_w __attribute__ ((vector_size(16), aligned(4))); ++typedef long long v2i64 __attribute__ ((vector_size(16), aligned(16))); ++typedef long long v2i64_d __attribute__ ((vector_size(16), aligned(8))); ++typedef unsigned long long v2u64 __attribute__ ((vector_size(16), aligned(16))); ++typedef unsigned long long v2u64_d __attribute__ ((vector_size(16), aligned(8))); ++typedef float v4f32 __attribute__ ((vector_size(16), aligned(16))); ++typedef float v4f32_w __attribute__ ((vector_size(16), aligned(4))); ++typedef double v2f64 __attribute__ ((vector_size(16), aligned(16))); ++typedef double v2f64_d __attribute__ ((vector_size(16), aligned(8))); ++ ++typedef long long __m128i __attribute__ ((__vector_size__ (16), __may_alias__)); ++typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__)); ++typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); ++ ++ ++// CHECK-LABEL: @vsll_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsll_b(_1, _2); } ++// CHECK-LABEL: @vsll_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsll_h(_1, _2); } ++// CHECK-LABEL: @vsll_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsll_w(_1, _2); } ++// CHECK-LABEL: @vsll_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsll_d(_1, _2); } ++// CHECK-LABEL: @vslli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vslli_b(v16i8 _1) { return __builtin_lsx_vslli_b(_1, 1); } ++// CHECK-LABEL: @vslli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vslli_h(v8i16 _1) { return __builtin_lsx_vslli_h(_1, 1); } ++// CHECK-LABEL: @vslli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vslli_w(v4i32 _1) { return __builtin_lsx_vslli_w(_1, 1); } ++// CHECK-LABEL: @vslli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vslli_d(v2i64 _1) { return __builtin_lsx_vslli_d(_1, 1); } ++// CHECK-LABEL: @vsra_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsra_b(_1, _2); } ++// CHECK-LABEL: @vsra_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsra_h(_1, _2); } ++// CHECK-LABEL: @vsra_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsra_w(_1, _2); } ++// CHECK-LABEL: @vsra_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsra_d(_1, _2); } ++// CHECK-LABEL: @vsrai_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrai_b(v16i8 _1) { return __builtin_lsx_vsrai_b(_1, 1); } ++// CHECK-LABEL: @vsrai_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrai_h(v8i16 _1) { return __builtin_lsx_vsrai_h(_1, 1); } ++// CHECK-LABEL: @vsrai_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrai_w(v4i32 _1) { return __builtin_lsx_vsrai_w(_1, 1); } ++// CHECK-LABEL: @vsrai_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrai_d(v2i64 _1) { return __builtin_lsx_vsrai_d(_1, 1); } ++// CHECK-LABEL: @vsrar_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrar_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrar_b(_1, _2); ++} ++// CHECK-LABEL: @vsrar_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrar_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrar_h(_1, _2); ++} ++// CHECK-LABEL: @vsrar_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrar_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrar_w(_1, _2); ++} ++// CHECK-LABEL: @vsrar_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrar_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrar_d(_1, _2); ++} ++// CHECK-LABEL: @vsrari_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrari_b(v16i8 _1) { return __builtin_lsx_vsrari_b(_1, 1); } ++// CHECK-LABEL: @vsrari_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrari_h(v8i16 _1) { return __builtin_lsx_vsrari_h(_1, 1); } ++// CHECK-LABEL: @vsrari_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrari_w(v4i32 _1) { return __builtin_lsx_vsrari_w(_1, 1); } ++// CHECK-LABEL: @vsrari_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrari_d(v2i64 _1) { return __builtin_lsx_vsrari_d(_1, 1); } ++// CHECK-LABEL: @vsrl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsrl_b(_1, _2); } ++// CHECK-LABEL: @vsrl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsrl_h(_1, _2); } ++// CHECK-LABEL: @vsrl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsrl_w(_1, _2); } ++// CHECK-LABEL: @vsrl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsrl_d(_1, _2); } ++// CHECK-LABEL: @vsrli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrli_b(v16i8 _1) { return __builtin_lsx_vsrli_b(_1, 1); } ++// CHECK-LABEL: @vsrli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrli_h(v8i16 _1) { return __builtin_lsx_vsrli_h(_1, 1); } ++// CHECK-LABEL: @vsrli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrli_w(v4i32 _1) { return __builtin_lsx_vsrli_w(_1, 1); } ++// CHECK-LABEL: @vsrli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrli_d(v2i64 _1) { return __builtin_lsx_vsrli_d(_1, 1); } ++// CHECK-LABEL: @vsrlr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrlr_b(_1, _2); ++} ++// CHECK-LABEL: @vsrlr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrlr_h(_1, _2); ++} ++// CHECK-LABEL: @vsrlr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrlr_w(_1, _2); ++} ++// CHECK-LABEL: @vsrlr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrlr_d(_1, _2); ++} ++// CHECK-LABEL: @vsrlri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrlri_b(v16i8 _1) { return __builtin_lsx_vsrlri_b(_1, 1); } ++// CHECK-LABEL: @vsrlri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrlri_h(v8i16 _1) { return __builtin_lsx_vsrlri_h(_1, 1); } ++// CHECK-LABEL: @vsrlri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrlri_w(v4i32 _1) { return __builtin_lsx_vsrlri_w(_1, 1); } ++// CHECK-LABEL: @vsrlri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrlri_d(v2i64 _1) { return __builtin_lsx_vsrlri_d(_1, 1); } ++// CHECK-LABEL: @vbitclr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vbitclr_b(_1, _2); ++} ++// CHECK-LABEL: @vbitclr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vbitclr_h(_1, _2); ++} ++// CHECK-LABEL: @vbitclr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vbitclr_w(_1, _2); ++} ++// CHECK-LABEL: @vbitclr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vbitclr_d(_1, _2); ++} ++// CHECK-LABEL: @vbitclri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vbitclri_b(v16u8 _1) { return __builtin_lsx_vbitclri_b(_1, 1); } ++// CHECK-LABEL: @vbitclri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vbitclri_h(v8u16 _1) { return __builtin_lsx_vbitclri_h(_1, 1); } ++// CHECK-LABEL: @vbitclri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vbitclri_w(v4u32 _1) { return __builtin_lsx_vbitclri_w(_1, 1); } ++// CHECK-LABEL: @vbitclri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vbitclri_d(v2u64 _1) { return __builtin_lsx_vbitclri_d(_1, 1); } ++// CHECK-LABEL: @vbitset_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vbitset_b(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vbitset_b(_1, _2); ++} ++// CHECK-LABEL: @vbitset_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vbitset_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vbitset_h(_1, _2); ++} ++// CHECK-LABEL: @vbitset_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vbitset_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vbitset_w(_1, _2); ++} ++// CHECK-LABEL: @vbitset_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vbitset_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vbitset_d(_1, _2); ++} ++// CHECK-LABEL: @vbitseti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vbitseti_b(v16u8 _1) { return __builtin_lsx_vbitseti_b(_1, 1); } ++// CHECK-LABEL: @vbitseti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vbitseti_h(v8u16 _1) { return __builtin_lsx_vbitseti_h(_1, 1); } ++// CHECK-LABEL: @vbitseti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vbitseti_w(v4u32 _1) { return __builtin_lsx_vbitseti_w(_1, 1); } ++// CHECK-LABEL: @vbitseti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vbitseti_d(v2u64 _1) { return __builtin_lsx_vbitseti_d(_1, 1); } ++// CHECK-LABEL: @vbitrev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vbitrev_b(_1, _2); ++} ++// CHECK-LABEL: @vbitrev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vbitrev_h(_1, _2); ++} ++// CHECK-LABEL: @vbitrev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vbitrev_w(_1, _2); ++} ++// CHECK-LABEL: @vbitrev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vbitrev_d(_1, _2); ++} ++// CHECK-LABEL: @vbitrevi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vbitrevi_b(v16u8 _1) { return __builtin_lsx_vbitrevi_b(_1, 1); } ++// CHECK-LABEL: @vbitrevi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vbitrevi_h(v8u16 _1) { return __builtin_lsx_vbitrevi_h(_1, 1); } ++// CHECK-LABEL: @vbitrevi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vbitrevi_w(v4u32 _1) { return __builtin_lsx_vbitrevi_w(_1, 1); } ++// CHECK-LABEL: @vbitrevi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vbitrevi_d(v2u64 _1) { return __builtin_lsx_vbitrevi_d(_1, 1); } ++// CHECK-LABEL: @vadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vadd_b(_1, _2); } ++// CHECK-LABEL: @vadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vadd_h(_1, _2); } ++// CHECK-LABEL: @vadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vadd_w(_1, _2); } ++// CHECK-LABEL: @vadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_d(_1, _2); } ++// CHECK-LABEL: @vaddi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vaddi_bu(v16i8 _1) { return __builtin_lsx_vaddi_bu(_1, 1); } ++// CHECK-LABEL: @vaddi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vaddi_hu(v8i16 _1) { return __builtin_lsx_vaddi_hu(_1, 1); } ++// CHECK-LABEL: @vaddi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vaddi_wu(v4i32 _1) { return __builtin_lsx_vaddi_wu(_1, 1); } ++// CHECK-LABEL: @vaddi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddi_du(v2i64 _1) { return __builtin_lsx_vaddi_du(_1, 1); } ++// CHECK-LABEL: @vsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsub_b(_1, _2); } ++// CHECK-LABEL: @vsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsub_h(_1, _2); } ++// CHECK-LABEL: @vsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsub_w(_1, _2); } ++// CHECK-LABEL: @vsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_d(_1, _2); } ++// CHECK-LABEL: @vsubi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsubi_bu(v16i8 _1) { return __builtin_lsx_vsubi_bu(_1, 1); } ++// CHECK-LABEL: @vsubi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsubi_hu(v8i16 _1) { return __builtin_lsx_vsubi_hu(_1, 1); } ++// CHECK-LABEL: @vsubi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsubi_wu(v4i32 _1) { return __builtin_lsx_vsubi_wu(_1, 1); } ++// CHECK-LABEL: @vsubi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubi_du(v2i64 _1) { return __builtin_lsx_vsubi_du(_1, 1); } ++// CHECK-LABEL: @vmax_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmax_b(_1, _2); } ++// CHECK-LABEL: @vmax_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmax_h(_1, _2); } ++// CHECK-LABEL: @vmax_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmax_w(_1, _2); } ++// CHECK-LABEL: @vmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmax_d(_1, _2); } ++// CHECK-LABEL: @vmaxi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmaxi_b(v16i8 _1) { return __builtin_lsx_vmaxi_b(_1, 1); } ++// CHECK-LABEL: @vmaxi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmaxi_h(v8i16 _1) { return __builtin_lsx_vmaxi_h(_1, 1); } ++// CHECK-LABEL: @vmaxi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmaxi_w(v4i32 _1) { return __builtin_lsx_vmaxi_w(_1, 1); } ++// CHECK-LABEL: @vmaxi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaxi_d(v2i64 _1) { return __builtin_lsx_vmaxi_d(_1, 1); } ++// CHECK-LABEL: @vmax_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vmax_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmax_bu(_1, _2); ++} ++// CHECK-LABEL: @vmax_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vmax_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmax_hu(_1, _2); ++} ++// CHECK-LABEL: @vmax_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vmax_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmax_wu(_1, _2); ++} ++// CHECK-LABEL: @vmax_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmax_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmax_du(_1, _2); ++} ++// CHECK-LABEL: @vmaxi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vmaxi_bu(v16u8 _1) { return __builtin_lsx_vmaxi_bu(_1, 1); } ++// CHECK-LABEL: @vmaxi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vmaxi_hu(v8u16 _1) { return __builtin_lsx_vmaxi_hu(_1, 1); } ++// CHECK-LABEL: @vmaxi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vmaxi_wu(v4u32 _1) { return __builtin_lsx_vmaxi_wu(_1, 1); } ++// CHECK-LABEL: @vmaxi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmaxi_du(v2u64 _1) { return __builtin_lsx_vmaxi_du(_1, 1); } ++// CHECK-LABEL: @vmin_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmin_b(_1, _2); } ++// CHECK-LABEL: @vmin_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmin_h(_1, _2); } ++// CHECK-LABEL: @vmin_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmin_w(_1, _2); } ++// CHECK-LABEL: @vmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmin_d(_1, _2); } ++// CHECK-LABEL: @vmini_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmini_b(v16i8 _1) { return __builtin_lsx_vmini_b(_1, 1); } ++// CHECK-LABEL: @vmini_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmini_h(v8i16 _1) { return __builtin_lsx_vmini_h(_1, 1); } ++// CHECK-LABEL: @vmini_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmini_w(v4i32 _1) { return __builtin_lsx_vmini_w(_1, 1); } ++// CHECK-LABEL: @vmini_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmini_d(v2i64 _1) { return __builtin_lsx_vmini_d(_1, 1); } ++// CHECK-LABEL: @vmin_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vmin_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmin_bu(_1, _2); ++} ++// CHECK-LABEL: @vmin_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vmin_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmin_hu(_1, _2); ++} ++// CHECK-LABEL: @vmin_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vmin_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmin_wu(_1, _2); ++} ++// CHECK-LABEL: @vmin_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmin_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmin_du(_1, _2); ++} ++// CHECK-LABEL: @vmini_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vmini_bu(v16u8 _1) { return __builtin_lsx_vmini_bu(_1, 1); } ++// CHECK-LABEL: @vmini_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vmini_hu(v8u16 _1) { return __builtin_lsx_vmini_hu(_1, 1); } ++// CHECK-LABEL: @vmini_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vmini_wu(v4u32 _1) { return __builtin_lsx_vmini_wu(_1, 1); } ++// CHECK-LABEL: @vmini_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmini_du(v2u64 _1) { return __builtin_lsx_vmini_du(_1, 1); } ++// CHECK-LABEL: @vseq_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vseq_b(_1, _2); } ++// CHECK-LABEL: @vseq_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vseq_h(_1, _2); } ++// CHECK-LABEL: @vseq_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vseq_w(_1, _2); } ++// CHECK-LABEL: @vseq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vseq_d(_1, _2); } ++// CHECK-LABEL: @vseqi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vseqi_b(v16i8 _1) { return __builtin_lsx_vseqi_b(_1, 1); } ++// CHECK-LABEL: @vseqi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vseqi_h(v8i16 _1) { return __builtin_lsx_vseqi_h(_1, 1); } ++// CHECK-LABEL: @vseqi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vseqi_w(v4i32 _1) { return __builtin_lsx_vseqi_w(_1, 1); } ++// CHECK-LABEL: @vseqi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vseqi_d(v2i64 _1) { return __builtin_lsx_vseqi_d(_1, 1); } ++// CHECK-LABEL: @vslti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vslti_b(v16i8 _1) { return __builtin_lsx_vslti_b(_1, 1); } ++// CHECK-LABEL: @vslt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vslt_b(_1, _2); } ++// CHECK-LABEL: @vslt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vslt_h(_1, _2); } ++// CHECK-LABEL: @vslt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vslt_w(_1, _2); } ++// CHECK-LABEL: @vslt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vslt_d(_1, _2); } ++// CHECK-LABEL: @vslti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vslti_h(v8i16 _1) { return __builtin_lsx_vslti_h(_1, 1); } ++// CHECK-LABEL: @vslti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vslti_w(v4i32 _1) { return __builtin_lsx_vslti_w(_1, 1); } ++// CHECK-LABEL: @vslti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vslti_d(v2i64 _1) { return __builtin_lsx_vslti_d(_1, 1); } ++// CHECK-LABEL: @vslt_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vslt_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vslt_bu(_1, _2); ++} ++// CHECK-LABEL: @vslt_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vslt_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vslt_hu(_1, _2); ++} ++// CHECK-LABEL: @vslt_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vslt_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vslt_wu(_1, _2); ++} ++// CHECK-LABEL: @vslt_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vslt_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vslt_du(_1, _2); ++} ++// CHECK-LABEL: @vslti_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vslti_bu(v16u8 _1) { return __builtin_lsx_vslti_bu(_1, 1); } ++// CHECK-LABEL: @vslti_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vslti_hu(v8u16 _1) { return __builtin_lsx_vslti_hu(_1, 1); } ++// CHECK-LABEL: @vslti_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vslti_wu(v4u32 _1) { return __builtin_lsx_vslti_wu(_1, 1); } ++// CHECK-LABEL: @vslti_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vslti_du(v2u64 _1) { return __builtin_lsx_vslti_du(_1, 1); } ++// CHECK-LABEL: @vsle_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsle_b(_1, _2); } ++// CHECK-LABEL: @vsle_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsle_h(_1, _2); } ++// CHECK-LABEL: @vsle_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsle_w(_1, _2); } ++// CHECK-LABEL: @vsle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsle_d(_1, _2); } ++// CHECK-LABEL: @vslei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vslei_b(v16i8 _1) { return __builtin_lsx_vslei_b(_1, 1); } ++// CHECK-LABEL: @vslei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vslei_h(v8i16 _1) { return __builtin_lsx_vslei_h(_1, 1); } ++// CHECK-LABEL: @vslei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vslei_w(v4i32 _1) { return __builtin_lsx_vslei_w(_1, 1); } ++// CHECK-LABEL: @vslei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vslei_d(v2i64 _1) { return __builtin_lsx_vslei_d(_1, 1); } ++// CHECK-LABEL: @vsle_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsle_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vsle_bu(_1, _2); ++} ++// CHECK-LABEL: @vsle_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsle_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vsle_hu(_1, _2); ++} ++// CHECK-LABEL: @vsle_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsle_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vsle_wu(_1, _2); ++} ++// CHECK-LABEL: @vsle_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsle_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vsle_du(_1, _2); ++} ++// CHECK-LABEL: @vslei_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vslei_bu(v16u8 _1) { return __builtin_lsx_vslei_bu(_1, 1); } ++// CHECK-LABEL: @vslei_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vslei_hu(v8u16 _1) { return __builtin_lsx_vslei_hu(_1, 1); } ++// CHECK-LABEL: @vslei_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vslei_wu(v4u32 _1) { return __builtin_lsx_vslei_wu(_1, 1); } ++// CHECK-LABEL: @vslei_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vslei_du(v2u64 _1) { return __builtin_lsx_vslei_du(_1, 1); } ++// CHECK-LABEL: @vsat_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsat_b(v16i8 _1) { return __builtin_lsx_vsat_b(_1, 1); } ++// CHECK-LABEL: @vsat_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsat_h(v8i16 _1) { return __builtin_lsx_vsat_h(_1, 1); } ++// CHECK-LABEL: @vsat_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsat_w(v4i32 _1) { return __builtin_lsx_vsat_w(_1, 1); } ++// CHECK-LABEL: @vsat_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsat_d(v2i64 _1) { return __builtin_lsx_vsat_d(_1, 1); } ++// CHECK-LABEL: @vsat_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vsat_bu(v16u8 _1) { return __builtin_lsx_vsat_bu(_1, 1); } ++// CHECK-LABEL: @vsat_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vsat_hu(v8u16 _1) { return __builtin_lsx_vsat_hu(_1, 1); } ++// CHECK-LABEL: @vsat_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vsat_wu(v4u32 _1) { return __builtin_lsx_vsat_wu(_1, 1); } ++// CHECK-LABEL: @vsat_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vsat_du(v2u64 _1) { return __builtin_lsx_vsat_du(_1, 1); } ++// CHECK-LABEL: @vadda_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vadda_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vadda_b(_1, _2); ++} ++// CHECK-LABEL: @vadda_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vadda_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vadda_h(_1, _2); ++} ++// CHECK-LABEL: @vadda_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vadda_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vadda_w(_1, _2); ++} ++// CHECK-LABEL: @vadda_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vadda_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vadda_d(_1, _2); ++} ++// CHECK-LABEL: @vsadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsadd_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsadd_b(_1, _2); ++} ++// CHECK-LABEL: @vsadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsadd_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsadd_h(_1, _2); ++} ++// CHECK-LABEL: @vsadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsadd_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsadd_w(_1, _2); ++} ++// CHECK-LABEL: @vsadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsadd_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsadd_d(_1, _2); ++} ++// CHECK-LABEL: @vsadd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vsadd_bu(_1, _2); ++} ++// CHECK-LABEL: @vsadd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vsadd_hu(_1, _2); ++} ++// CHECK-LABEL: @vsadd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vsadd_wu(_1, _2); ++} ++// CHECK-LABEL: @vsadd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vsadd_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vsadd_du(_1, _2); ++} ++// CHECK-LABEL: @vavg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vavg_b(_1, _2); } ++// CHECK-LABEL: @vavg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vavg_h(_1, _2); } ++// CHECK-LABEL: @vavg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vavg_w(_1, _2); } ++// CHECK-LABEL: @vavg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vavg_d(_1, _2); } ++// CHECK-LABEL: @vavg_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vavg_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vavg_bu(_1, _2); ++} ++// CHECK-LABEL: @vavg_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vavg_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vavg_hu(_1, _2); ++} ++// CHECK-LABEL: @vavg_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vavg_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vavg_wu(_1, _2); ++} ++// CHECK-LABEL: @vavg_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vavg_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vavg_du(_1, _2); ++} ++// CHECK-LABEL: @vavgr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vavgr_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vavgr_b(_1, _2); ++} ++// CHECK-LABEL: @vavgr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vavgr_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vavgr_h(_1, _2); ++} ++// CHECK-LABEL: @vavgr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vavgr_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vavgr_w(_1, _2); ++} ++// CHECK-LABEL: @vavgr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vavgr_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vavgr_d(_1, _2); ++} ++// CHECK-LABEL: @vavgr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vavgr_bu(_1, _2); ++} ++// CHECK-LABEL: @vavgr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vavgr_hu(_1, _2); ++} ++// CHECK-LABEL: @vavgr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vavgr_wu(_1, _2); ++} ++// CHECK-LABEL: @vavgr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vavgr_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vavgr_du(_1, _2); ++} ++// CHECK-LABEL: @vssub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssub_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vssub_b(_1, _2); ++} ++// CHECK-LABEL: @vssub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssub_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssub_h(_1, _2); ++} ++// CHECK-LABEL: @vssub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssub_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssub_w(_1, _2); ++} ++// CHECK-LABEL: @vssub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vssub_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssub_d(_1, _2); ++} ++// CHECK-LABEL: @vssub_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssub_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vssub_bu(_1, _2); ++} ++// CHECK-LABEL: @vssub_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssub_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vssub_hu(_1, _2); ++} ++// CHECK-LABEL: @vssub_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssub_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vssub_wu(_1, _2); ++} ++// CHECK-LABEL: @vssub_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vssub_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vssub_du(_1, _2); ++} ++// CHECK-LABEL: @vabsd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vabsd_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vabsd_b(_1, _2); ++} ++// CHECK-LABEL: @vabsd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vabsd_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vabsd_h(_1, _2); ++} ++// CHECK-LABEL: @vabsd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vabsd_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vabsd_w(_1, _2); ++} ++// CHECK-LABEL: @vabsd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vabsd_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vabsd_d(_1, _2); ++} ++// CHECK-LABEL: @vabsd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vabsd_bu(_1, _2); ++} ++// CHECK-LABEL: @vabsd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vabsd_hu(_1, _2); ++} ++// CHECK-LABEL: @vabsd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vabsd_wu(_1, _2); ++} ++// CHECK-LABEL: @vabsd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vabsd_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vabsd_du(_1, _2); ++} ++// CHECK-LABEL: @vmul_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmul_b(_1, _2); } ++// CHECK-LABEL: @vmul_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmul_h(_1, _2); } ++// CHECK-LABEL: @vmul_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmul_w(_1, _2); } ++// CHECK-LABEL: @vmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmul_d(_1, _2); } ++// CHECK-LABEL: @vmadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vmadd_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vmadd_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __builtin_lsx_vmadd_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __builtin_lsx_vmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vmsub_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vmsub_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __builtin_lsx_vmsub_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __builtin_lsx_vmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vdiv_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vdiv_b(_1, _2); } ++// CHECK-LABEL: @vdiv_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vdiv_h(_1, _2); } ++// CHECK-LABEL: @vdiv_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vdiv_w(_1, _2); } ++// CHECK-LABEL: @vdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vdiv_d(_1, _2); } ++// CHECK-LABEL: @vdiv_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vdiv_bu(_1, _2); ++} ++// CHECK-LABEL: @vdiv_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vdiv_hu(_1, _2); ++} ++// CHECK-LABEL: @vdiv_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vdiv_wu(_1, _2); ++} ++// CHECK-LABEL: @vdiv_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vdiv_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vdiv_du(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vhaddw_h_b(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vhaddw_w_h(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vhaddw_d_w(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vhaddw_hu_bu(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vhaddw_wu_hu(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vhaddw_du_wu(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vhsubw_h_b(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vhsubw_w_h(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vhsubw_d_w(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vhsubw_hu_bu(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vhsubw_wu_hu(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vhsubw_du_wu(_1, _2); ++} ++// CHECK-LABEL: @vmod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmod_b(_1, _2); } ++// CHECK-LABEL: @vmod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmod_h(_1, _2); } ++// CHECK-LABEL: @vmod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmod_w(_1, _2); } ++// CHECK-LABEL: @vmod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmod_d(_1, _2); } ++// CHECK-LABEL: @vmod_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vmod_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmod_bu(_1, _2); ++} ++// CHECK-LABEL: @vmod_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vmod_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmod_hu(_1, _2); ++} ++// CHECK-LABEL: @vmod_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vmod_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmod_wu(_1, _2); ++} ++// CHECK-LABEL: @vmod_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmod_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmod_du(_1, _2); ++} ++// CHECK-LABEL: @vreplve_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> _1:%.*, i32 _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vreplve_b(v16i8 _1, int _2) { ++ return __builtin_lsx_vreplve_b(_1, _2); ++} ++// CHECK-LABEL: @vreplve_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> _1:%.*, i32 _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vreplve_h(v8i16 _1, int _2) { ++ return __builtin_lsx_vreplve_h(_1, _2); ++} ++// CHECK-LABEL: @vreplve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> _1:%.*, i32 _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vreplve_w(v4i32 _1, int _2) { ++ return __builtin_lsx_vreplve_w(_1, _2); ++} ++// CHECK-LABEL: @vreplve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> _1:%.*, i32 _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vreplve_d(v2i64 _1, int _2) { ++ return __builtin_lsx_vreplve_d(_1, _2); ++} ++// CHECK-LABEL: @vreplvei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vreplvei_b(v16i8 _1) { return __builtin_lsx_vreplvei_b(_1, 1); } ++// CHECK-LABEL: @vreplvei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vreplvei_h(v8i16 _1) { return __builtin_lsx_vreplvei_h(_1, 1); } ++// CHECK-LABEL: @vreplvei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vreplvei_w(v4i32 _1) { return __builtin_lsx_vreplvei_w(_1, 1); } ++// CHECK-LABEL: @vreplvei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vreplvei_d(v2i64 _1) { return __builtin_lsx_vreplvei_d(_1, 1); } ++// CHECK-LABEL: @vpickev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vpickev_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vpickev_b(_1, _2); ++} ++// CHECK-LABEL: @vpickev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vpickev_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vpickev_h(_1, _2); ++} ++// CHECK-LABEL: @vpickev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vpickev_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vpickev_w(_1, _2); ++} ++// CHECK-LABEL: @vpickev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vpickev_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vpickev_d(_1, _2); ++} ++// CHECK-LABEL: @vpickod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vpickod_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vpickod_b(_1, _2); ++} ++// CHECK-LABEL: @vpickod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vpickod_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vpickod_h(_1, _2); ++} ++// CHECK-LABEL: @vpickod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vpickod_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vpickod_w(_1, _2); ++} ++// CHECK-LABEL: @vpickod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vpickod_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vpickod_d(_1, _2); ++} ++// CHECK-LABEL: @vilvh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vilvh_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vilvh_b(_1, _2); ++} ++// CHECK-LABEL: @vilvh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vilvh_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vilvh_h(_1, _2); ++} ++// CHECK-LABEL: @vilvh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vilvh_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vilvh_w(_1, _2); ++} ++// CHECK-LABEL: @vilvh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vilvh_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vilvh_d(_1, _2); ++} ++// CHECK-LABEL: @vilvl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vilvl_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vilvl_b(_1, _2); ++} ++// CHECK-LABEL: @vilvl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vilvl_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vilvl_h(_1, _2); ++} ++// CHECK-LABEL: @vilvl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vilvl_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vilvl_w(_1, _2); ++} ++// CHECK-LABEL: @vilvl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vilvl_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vilvl_d(_1, _2); ++} ++// CHECK-LABEL: @vpackev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vpackev_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vpackev_b(_1, _2); ++} ++// CHECK-LABEL: @vpackev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vpackev_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vpackev_h(_1, _2); ++} ++// CHECK-LABEL: @vpackev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vpackev_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vpackev_w(_1, _2); ++} ++// CHECK-LABEL: @vpackev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vpackev_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vpackev_d(_1, _2); ++} ++// CHECK-LABEL: @vpackod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vpackod_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vpackod_b(_1, _2); ++} ++// CHECK-LABEL: @vpackod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vpackod_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vpackod_h(_1, _2); ++} ++// CHECK-LABEL: @vpackod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vpackod_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vpackod_w(_1, _2); ++} ++// CHECK-LABEL: @vpackod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vpackod_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vpackod_d(_1, _2); ++} ++// CHECK-LABEL: @vshuf_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vshuf_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { ++ return __builtin_lsx_vshuf_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __builtin_lsx_vshuf_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vand_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vand_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vand_v(_1, _2); } ++// CHECK-LABEL: @vandi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vandi_b(v16u8 _1) { return __builtin_lsx_vandi_b(_1, 1); } ++// CHECK-LABEL: @vor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vor_v(_1, _2); } ++// CHECK-LABEL: @vori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vori_b(v16u8 _1) { return __builtin_lsx_vori_b(_1, 1); } ++// CHECK-LABEL: @vnor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vnor_v(_1, _2); } ++// CHECK-LABEL: @vnori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vnori_b(v16u8 _1) { return __builtin_lsx_vnori_b(_1, 1); } ++// CHECK-LABEL: @vxor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vxor_v(_1, _2); } ++// CHECK-LABEL: @vxori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vxori_b(v16u8 _1) { return __builtin_lsx_vxori_b(_1, 1); } ++// CHECK-LABEL: @vbitsel_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { ++ return __builtin_lsx_vbitsel_v(_1, _2, _3); ++} ++// CHECK-LABEL: @vbitseli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vbitseli_b(_1, _2, 1); ++} ++// CHECK-LABEL: @vshuf4i_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vshuf4i_b(v16i8 _1) { return __builtin_lsx_vshuf4i_b(_1, 1); } ++// CHECK-LABEL: @vshuf4i_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vshuf4i_h(v8i16 _1) { return __builtin_lsx_vshuf4i_h(_1, 1); } ++// CHECK-LABEL: @vshuf4i_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vshuf4i_w(v4i32 _1) { return __builtin_lsx_vshuf4i_w(_1, 1); } ++// CHECK-LABEL: @vreplgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 _1:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vreplgr2vr_b(int _1) { return __builtin_lsx_vreplgr2vr_b(_1); } ++// CHECK-LABEL: @vreplgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 _1:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vreplgr2vr_h(int _1) { return __builtin_lsx_vreplgr2vr_h(_1); } ++// CHECK-LABEL: @vreplgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vreplgr2vr_w(int _1) { return __builtin_lsx_vreplgr2vr_w(_1); } ++// CHECK-LABEL: @vreplgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vreplgr2vr_d(long _1) { return __builtin_lsx_vreplgr2vr_d(_1); } ++// CHECK-LABEL: @vpcnt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vpcnt_b(v16i8 _1) { return __builtin_lsx_vpcnt_b(_1); } ++// CHECK-LABEL: @vpcnt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vpcnt_h(v8i16 _1) { return __builtin_lsx_vpcnt_h(_1); } ++// CHECK-LABEL: @vpcnt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vpcnt_w(v4i32 _1) { return __builtin_lsx_vpcnt_w(_1); } ++// CHECK-LABEL: @vpcnt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vpcnt_d(v2i64 _1) { return __builtin_lsx_vpcnt_d(_1); } ++// CHECK-LABEL: @vclo_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vclo_b(v16i8 _1) { return __builtin_lsx_vclo_b(_1); } ++// CHECK-LABEL: @vclo_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vclo_h(v8i16 _1) { return __builtin_lsx_vclo_h(_1); } ++// CHECK-LABEL: @vclo_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vclo_w(v4i32 _1) { return __builtin_lsx_vclo_w(_1); } ++// CHECK-LABEL: @vclo_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vclo_d(v2i64 _1) { return __builtin_lsx_vclo_d(_1); } ++// CHECK-LABEL: @vclz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vclz_b(v16i8 _1) { return __builtin_lsx_vclz_b(_1); } ++// CHECK-LABEL: @vclz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vclz_h(v8i16 _1) { return __builtin_lsx_vclz_h(_1); } ++// CHECK-LABEL: @vclz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vclz_w(v4i32 _1) { return __builtin_lsx_vclz_w(_1); } ++// CHECK-LABEL: @vclz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vclz_d(v2i64 _1) { return __builtin_lsx_vclz_d(_1); } ++// CHECK-LABEL: @vpickve2gr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int vpickve2gr_b(v16i8 _1) { return __builtin_lsx_vpickve2gr_b(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int vpickve2gr_h(v8i16 _1) { return __builtin_lsx_vpickve2gr_h(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int vpickve2gr_w(v4i32 _1) { return __builtin_lsx_vpickve2gr_w(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret i64 TMP0 ++// ++long vpickve2gr_d(v2i64 _1) { return __builtin_lsx_vpickve2gr_d(_1, 1); } ++// CHECK-LABEL: @vpickve2gr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret i32 TMP0 ++// ++unsigned int vpickve2gr_bu(v16i8 _1) { ++ return __builtin_lsx_vpickve2gr_bu(_1, 1); ++} ++// CHECK-LABEL: @vpickve2gr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret i32 TMP0 ++// ++unsigned int vpickve2gr_hu(v8i16 _1) { ++ return __builtin_lsx_vpickve2gr_hu(_1, 1); ++} ++// CHECK-LABEL: @vpickve2gr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret i32 TMP0 ++// ++unsigned int vpickve2gr_wu(v4i32 _1) { ++ return __builtin_lsx_vpickve2gr_wu(_1, 1); ++} ++// CHECK-LABEL: @vpickve2gr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret i64 TMP0 ++// ++unsigned long int vpickve2gr_du(v2i64 _1) { ++ return __builtin_lsx_vpickve2gr_du(_1, 1); ++} ++// CHECK-LABEL: @vinsgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> _1:%.*, i32 1, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vinsgr2vr_b(v16i8 _1) { ++ return __builtin_lsx_vinsgr2vr_b(_1, 1, 1); ++} ++// CHECK-LABEL: @vinsgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> _1:%.*, i32 1, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vinsgr2vr_h(v8i16 _1) { ++ return __builtin_lsx_vinsgr2vr_h(_1, 1, 1); ++} ++// CHECK-LABEL: @vinsgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> _1:%.*, i32 1, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vinsgr2vr_w(v4i32 _1) { ++ return __builtin_lsx_vinsgr2vr_w(_1, 1, 1); ++} ++// CHECK-LABEL: @vinsgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> _1:%.*, i64 1, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vinsgr2vr_d(v2i64 _1) { ++ return __builtin_lsx_vinsgr2vr_d(_1, 1, 1); ++} ++// CHECK-LABEL: @vfadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfadd_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfadd_s(_1, _2); ++} ++// CHECK-LABEL: @vfadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfadd_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfadd_d(_1, _2); ++} ++// CHECK-LABEL: @vfsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfsub_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfsub_s(_1, _2); ++} ++// CHECK-LABEL: @vfsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfsub_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfsub_d(_1, _2); ++} ++// CHECK-LABEL: @vfmul_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfmul_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfmul_s(_1, _2); ++} ++// CHECK-LABEL: @vfmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfmul_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfmul_d(_1, _2); ++} ++// CHECK-LABEL: @vfdiv_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfdiv_s(_1, _2); ++} ++// CHECK-LABEL: @vfdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfdiv_d(_1, _2); ++} ++// CHECK-LABEL: @vfcvt_h_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcvt_h_s(_1, _2); ++} ++// CHECK-LABEL: @vfcvt_s_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcvt_s_d(_1, _2); ++} ++// CHECK-LABEL: @vfmin_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfmin_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfmin_s(_1, _2); ++} ++// CHECK-LABEL: @vfmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfmin_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfmin_d(_1, _2); ++} ++// CHECK-LABEL: @vfmina_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfmina_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfmina_s(_1, _2); ++} ++// CHECK-LABEL: @vfmina_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfmina_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfmina_d(_1, _2); ++} ++// CHECK-LABEL: @vfmax_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfmax_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfmax_s(_1, _2); ++} ++// CHECK-LABEL: @vfmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfmax_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfmax_d(_1, _2); ++} ++// CHECK-LABEL: @vfmaxa_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfmaxa_s(_1, _2); ++} ++// CHECK-LABEL: @vfmaxa_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfmaxa_d(_1, _2); ++} ++// CHECK-LABEL: @vfclass_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfclass_s(v4f32 _1) { return __builtin_lsx_vfclass_s(_1); } ++// CHECK-LABEL: @vfclass_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfclass_d(v2f64 _1) { return __builtin_lsx_vfclass_d(_1); } ++// CHECK-LABEL: @vfsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfsqrt_s(v4f32 _1) { return __builtin_lsx_vfsqrt_s(_1); } ++// CHECK-LABEL: @vfsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfsqrt_d(v2f64 _1) { return __builtin_lsx_vfsqrt_d(_1); } ++// CHECK-LABEL: @vfrecip_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfrecip_s(v4f32 _1) { return __builtin_lsx_vfrecip_s(_1); } ++// CHECK-LABEL: @vfrecip_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfrecip_d(v2f64 _1) { return __builtin_lsx_vfrecip_d(_1); } ++// CHECK-LABEL: @vfrint_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfrint_s(v4f32 _1) { return __builtin_lsx_vfrint_s(_1); } ++// CHECK-LABEL: @vfrint_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfrint_d(v2f64 _1) { return __builtin_lsx_vfrint_d(_1); } ++// CHECK-LABEL: @vfrsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfrsqrt_s(v4f32 _1) { return __builtin_lsx_vfrsqrt_s(_1); } ++// CHECK-LABEL: @vfrsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfrsqrt_d(v2f64 _1) { return __builtin_lsx_vfrsqrt_d(_1); } ++// CHECK-LABEL: @vflogb_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vflogb_s(v4f32 _1) { return __builtin_lsx_vflogb_s(_1); } ++// CHECK-LABEL: @vflogb_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vflogb_d(v2f64 _1) { return __builtin_lsx_vflogb_d(_1); } ++// CHECK-LABEL: @vfcvth_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfcvth_s_h(v8i16 _1) { return __builtin_lsx_vfcvth_s_h(_1); } ++// CHECK-LABEL: @vfcvth_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfcvth_d_s(v4f32 _1) { return __builtin_lsx_vfcvth_d_s(_1); } ++// CHECK-LABEL: @vfcvtl_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfcvtl_s_h(v8i16 _1) { return __builtin_lsx_vfcvtl_s_h(_1); } ++// CHECK-LABEL: @vfcvtl_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfcvtl_d_s(v4f32 _1) { return __builtin_lsx_vfcvtl_d_s(_1); } ++// CHECK-LABEL: @vftint_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftint_w_s(v4f32 _1) { return __builtin_lsx_vftint_w_s(_1); } ++// CHECK-LABEL: @vftint_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftint_l_d(v2f64 _1) { return __builtin_lsx_vftint_l_d(_1); } ++// CHECK-LABEL: @vftint_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vftint_wu_s(v4f32 _1) { return __builtin_lsx_vftint_wu_s(_1); } ++// CHECK-LABEL: @vftint_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vftint_lu_d(v2f64 _1) { return __builtin_lsx_vftint_lu_d(_1); } ++// CHECK-LABEL: @vftintrz_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftintrz_w_s(v4f32 _1) { return __builtin_lsx_vftintrz_w_s(_1); } ++// CHECK-LABEL: @vftintrz_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrz_l_d(v2f64 _1) { return __builtin_lsx_vftintrz_l_d(_1); } ++// CHECK-LABEL: @vftintrz_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vftintrz_wu_s(v4f32 _1) { return __builtin_lsx_vftintrz_wu_s(_1); } ++// CHECK-LABEL: @vftintrz_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vftintrz_lu_d(v2f64 _1) { return __builtin_lsx_vftintrz_lu_d(_1); } ++// CHECK-LABEL: @vffint_s_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vffint_s_w(v4i32 _1) { return __builtin_lsx_vffint_s_w(_1); } ++// CHECK-LABEL: @vffint_d_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vffint_d_l(v2i64 _1) { return __builtin_lsx_vffint_d_l(_1); } ++// CHECK-LABEL: @vffint_s_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vffint_s_wu(v4u32 _1) { return __builtin_lsx_vffint_s_wu(_1); } ++// CHECK-LABEL: @vffint_d_lu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vffint_d_lu(v2u64 _1) { return __builtin_lsx_vffint_d_lu(_1); } ++// CHECK-LABEL: @vandn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vandn_v(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vandn_v(_1, _2); ++} ++// CHECK-LABEL: @vneg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vneg_b(v16i8 _1) { return __builtin_lsx_vneg_b(_1); } ++// CHECK-LABEL: @vneg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vneg_h(v8i16 _1) { return __builtin_lsx_vneg_h(_1); } ++// CHECK-LABEL: @vneg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vneg_w(v4i32 _1) { return __builtin_lsx_vneg_w(_1); } ++// CHECK-LABEL: @vneg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vneg_d(v2i64 _1) { return __builtin_lsx_vneg_d(_1); } ++// CHECK-LABEL: @vmuh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmuh_b(_1, _2); } ++// CHECK-LABEL: @vmuh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmuh_h(_1, _2); } ++// CHECK-LABEL: @vmuh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmuh_w(_1, _2); } ++// CHECK-LABEL: @vmuh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmuh_d(_1, _2); } ++// CHECK-LABEL: @vmuh_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmuh_bu(_1, _2); ++} ++// CHECK-LABEL: @vmuh_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmuh_hu(_1, _2); ++} ++// CHECK-LABEL: @vmuh_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmuh_wu(_1, _2); ++} ++// CHECK-LABEL: @vmuh_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmuh_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmuh_du(_1, _2); ++} ++// CHECK-LABEL: @vsllwil_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsllwil_h_b(v16i8 _1) { return __builtin_lsx_vsllwil_h_b(_1, 1); } ++// CHECK-LABEL: @vsllwil_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsllwil_w_h(v8i16 _1) { return __builtin_lsx_vsllwil_w_h(_1, 1); } ++// CHECK-LABEL: @vsllwil_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsllwil_d_w(v4i32 _1) { return __builtin_lsx_vsllwil_d_w(_1, 1); } ++// CHECK-LABEL: @vsllwil_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vsllwil_hu_bu(v16u8 _1) { ++ return __builtin_lsx_vsllwil_hu_bu(_1, 1); ++} ++// CHECK-LABEL: @vsllwil_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vsllwil_wu_hu(v8u16 _1) { ++ return __builtin_lsx_vsllwil_wu_hu(_1, 1); ++} ++// CHECK-LABEL: @vsllwil_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vsllwil_du_wu(v4u32 _1) { ++ return __builtin_lsx_vsllwil_du_wu(_1, 1); ++} ++// CHECK-LABEL: @vsran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsran_b_h(_1, _2); ++} ++// CHECK-LABEL: @vsran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsran_h_w(_1, _2); ++} ++// CHECK-LABEL: @vsran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsran_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssran_b_h(_1, _2); ++} ++// CHECK-LABEL: @vssran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssran_h_w(_1, _2); ++} ++// CHECK-LABEL: @vssran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssran_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssran_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vssran_bu_h(_1, _2); ++} ++// CHECK-LABEL: @vssran_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vssran_hu_w(_1, _2); ++} ++// CHECK-LABEL: @vssran_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vssran_wu_d(_1, _2); ++} ++// CHECK-LABEL: @vsrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrarn_b_h(_1, _2); ++} ++// CHECK-LABEL: @vsrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrarn_h_w(_1, _2); ++} ++// CHECK-LABEL: @vsrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrarn_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrarn_b_h(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrarn_h_w(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrarn_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vssrarn_bu_h(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vssrarn_hu_w(_1, _2); ++} ++// CHECK-LABEL: @vssrarn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vssrarn_wu_d(_1, _2); ++} ++// CHECK-LABEL: @vsrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrln_b_h(_1, _2); ++} ++// CHECK-LABEL: @vsrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrln_h_w(_1, _2); ++} ++// CHECK-LABEL: @vsrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrln_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssrln_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vssrln_bu_h(_1, _2); ++} ++// CHECK-LABEL: @vssrln_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vssrln_hu_w(_1, _2); ++} ++// CHECK-LABEL: @vssrln_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vssrln_wu_d(_1, _2); ++} ++// CHECK-LABEL: @vsrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrlrn_b_h(_1, _2); ++} ++// CHECK-LABEL: @vsrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrlrn_h_w(_1, _2); ++} ++// CHECK-LABEL: @vsrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrlrn_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssrlrn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vssrlrn_bu_h(_1, _2); ++} ++// CHECK-LABEL: @vssrlrn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vssrlrn_hu_w(_1, _2); ++} ++// CHECK-LABEL: @vssrlrn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vssrlrn_wu_d(_1, _2); ++} ++// CHECK-LABEL: @vfrstpi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vfrstpi_b(_1, _2, 1); ++} ++// CHECK-LABEL: @vfrstpi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vfrstpi_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vfrstp_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vfrstp_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vfrstp_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vfrstp_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vshuf4i_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vshuf4i_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vbsrl_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vbsrl_v(v16i8 _1) { return __builtin_lsx_vbsrl_v(_1, 1); } ++// CHECK-LABEL: @vbsll_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vbsll_v(v16i8 _1) { return __builtin_lsx_vbsll_v(_1, 1); } ++// CHECK-LABEL: @vextrins_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vextrins_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vextrins_b(_1, _2, 1); ++} ++// CHECK-LABEL: @vextrins_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vextrins_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vextrins_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vextrins_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vextrins_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vextrins_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vextrins_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vextrins_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vextrins_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vmskltz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmskltz_b(v16i8 _1) { return __builtin_lsx_vmskltz_b(_1); } ++// CHECK-LABEL: @vmskltz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmskltz_h(v8i16 _1) { return __builtin_lsx_vmskltz_h(_1); } ++// CHECK-LABEL: @vmskltz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmskltz_w(v4i32 _1) { return __builtin_lsx_vmskltz_w(_1); } ++// CHECK-LABEL: @vmskltz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmskltz_d(v2i64 _1) { return __builtin_lsx_vmskltz_d(_1); } ++// CHECK-LABEL: @vsigncov_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsigncov_b(_1, _2); ++} ++// CHECK-LABEL: @vsigncov_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsigncov_h(_1, _2); ++} ++// CHECK-LABEL: @vsigncov_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsigncov_w(_1, _2); ++} ++// CHECK-LABEL: @vsigncov_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsigncov_d(_1, _2); ++} ++// CHECK-LABEL: @vfmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> _1:%.*, <4 x float> _2:%.*, <4 x float> _3:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __builtin_lsx_vfmadd_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> _1:%.*, <2 x double> _2:%.*, <2 x double> _3:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __builtin_lsx_vfmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> _1:%.*, <4 x float> _2:%.*, <4 x float> _3:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __builtin_lsx_vfmsub_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> _1:%.*, <2 x double> _2:%.*, <2 x double> _3:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __builtin_lsx_vfmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> _1:%.*, <4 x float> _2:%.*, <4 x float> _3:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __builtin_lsx_vfnmadd_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> _1:%.*, <2 x double> _2:%.*, <2 x double> _3:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __builtin_lsx_vfnmadd_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> _1:%.*, <4 x float> _2:%.*, <4 x float> _3:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { ++ return __builtin_lsx_vfnmsub_s(_1, _2, _3); ++} ++// CHECK-LABEL: @vfnmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> _1:%.*, <2 x double> _2:%.*, <2 x double> _3:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { ++ return __builtin_lsx_vfnmsub_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vftintrne_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftintrne_w_s(v4f32 _1) { return __builtin_lsx_vftintrne_w_s(_1); } ++// CHECK-LABEL: @vftintrne_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrne_l_d(v2f64 _1) { return __builtin_lsx_vftintrne_l_d(_1); } ++// CHECK-LABEL: @vftintrp_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftintrp_w_s(v4f32 _1) { return __builtin_lsx_vftintrp_w_s(_1); } ++// CHECK-LABEL: @vftintrp_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrp_l_d(v2f64 _1) { return __builtin_lsx_vftintrp_l_d(_1); } ++// CHECK-LABEL: @vftintrm_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftintrm_w_s(v4f32 _1) { return __builtin_lsx_vftintrm_w_s(_1); } ++// CHECK-LABEL: @vftintrm_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrm_l_d(v2f64 _1) { return __builtin_lsx_vftintrm_l_d(_1); } ++// CHECK-LABEL: @vftint_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vftint_w_d(_1, _2); ++} ++// CHECK-LABEL: @vffint_s_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x float> TMP0 ++// ++v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vffint_s_l(_1, _2); ++} ++// CHECK-LABEL: @vftintrz_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vftintrz_w_d(_1, _2); ++} ++// CHECK-LABEL: @vftintrp_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vftintrp_w_d(_1, _2); ++} ++// CHECK-LABEL: @vftintrm_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vftintrm_w_d(_1, _2); ++} ++// CHECK-LABEL: @vftintrne_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vftintrne_w_d(_1, _2); ++} ++// CHECK-LABEL: @vftintl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintl_l_s(v4f32 _1) { return __builtin_lsx_vftintl_l_s(_1); } ++// CHECK-LABEL: @vftinth_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftinth_l_s(v4f32 _1) { return __builtin_lsx_vftinth_l_s(_1); } ++// CHECK-LABEL: @vffinth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vffinth_d_w(v4i32 _1) { return __builtin_lsx_vffinth_d_w(_1); } ++// CHECK-LABEL: @vffintl_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <2 x double> TMP0 ++// ++v2f64 vffintl_d_w(v4i32 _1) { return __builtin_lsx_vffintl_d_w(_1); } ++// CHECK-LABEL: @vftintrzl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrzl_l_s(v4f32 _1) { return __builtin_lsx_vftintrzl_l_s(_1); } ++// CHECK-LABEL: @vftintrzh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrzh_l_s(v4f32 _1) { return __builtin_lsx_vftintrzh_l_s(_1); } ++// CHECK-LABEL: @vftintrpl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrpl_l_s(v4f32 _1) { return __builtin_lsx_vftintrpl_l_s(_1); } ++// CHECK-LABEL: @vftintrph_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrph_l_s(v4f32 _1) { return __builtin_lsx_vftintrph_l_s(_1); } ++// CHECK-LABEL: @vftintrml_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrml_l_s(v4f32 _1) { return __builtin_lsx_vftintrml_l_s(_1); } ++// CHECK-LABEL: @vftintrmh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrmh_l_s(v4f32 _1) { return __builtin_lsx_vftintrmh_l_s(_1); } ++// CHECK-LABEL: @vftintrnel_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrnel_l_s(v4f32 _1) { ++ return __builtin_lsx_vftintrnel_l_s(_1); ++} ++// CHECK-LABEL: @vftintrneh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vftintrneh_l_s(v4f32 _1) { ++ return __builtin_lsx_vftintrneh_l_s(_1); ++} ++// CHECK-LABEL: @vfrintrne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x float> TMP0 to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> TMP1 ++// ++v4i32 vfrintrne_s(v4f32 _1) { return __builtin_lsx_vfrintrne_s(_1); } ++// CHECK-LABEL: @vfrintrne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <2 x double> TMP0 to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> TMP1 ++// ++v2i64 vfrintrne_d(v2f64 _1) { return __builtin_lsx_vfrintrne_d(_1); } ++// CHECK-LABEL: @vfrintrz_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x float> TMP0 to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> TMP1 ++// ++v4i32 vfrintrz_s(v4f32 _1) { return __builtin_lsx_vfrintrz_s(_1); } ++// CHECK-LABEL: @vfrintrz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <2 x double> TMP0 to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> TMP1 ++// ++v2i64 vfrintrz_d(v2f64 _1) { return __builtin_lsx_vfrintrz_d(_1); } ++// CHECK-LABEL: @vfrintrp_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x float> TMP0 to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> TMP1 ++// ++v4i32 vfrintrp_s(v4f32 _1) { return __builtin_lsx_vfrintrp_s(_1); } ++// CHECK-LABEL: @vfrintrp_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <2 x double> TMP0 to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> TMP1 ++// ++v2i64 vfrintrp_d(v2f64 _1) { return __builtin_lsx_vfrintrp_d(_1); } ++// CHECK-LABEL: @vfrintrm_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x float> TMP0 to <4 x i32> ++// CHECK-NEXT: ret <4 x i32> TMP1 ++// ++v4i32 vfrintrm_s(v4f32 _1) { return __builtin_lsx_vfrintrm_s(_1); } ++// CHECK-LABEL: @vfrintrm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <2 x double> TMP0 to <2 x i64> ++// CHECK-NEXT: ret <2 x i64> TMP1 ++// ++v2i64 vfrintrm_d(v2f64 _1) { return __builtin_lsx_vfrintrm_d(_1); } ++// CHECK-LABEL: @vstelm_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> _1:%.*, ptr _2:%.*, i32 1, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_b(v16i8 _1, void *_2) { ++ return __builtin_lsx_vstelm_b(_1, _2, 1, 1); ++} ++// CHECK-LABEL: @vstelm_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> _1:%.*, ptr _2:%.*, i32 2, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_h(v8i16 _1, void *_2) { ++ return __builtin_lsx_vstelm_h(_1, _2, 2, 1); ++} ++// CHECK-LABEL: @vstelm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> _1:%.*, ptr _2:%.*, i32 4, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_w(v4i32 _1, void *_2) { ++ return __builtin_lsx_vstelm_w(_1, _2, 4, 1); ++} ++// CHECK-LABEL: @vstelm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> _1:%.*, ptr _2:%.*, i32 8, i32 1) ++// CHECK-NEXT: ret void ++// ++void vstelm_d(v2i64 _1, void *_2) { ++ return __builtin_lsx_vstelm_d(_1, _2, 8, 1); ++} ++// CHECK-LABEL: @vaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vaddwev_d_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vaddwev_w_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vaddwev_h_b(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vaddwod_d_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vaddwod_w_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vaddwod_h_b(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vaddwev_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vaddwev_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vaddwev_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vaddwod_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vaddwod_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vaddwod_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vaddwev_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vaddwev_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vaddwev_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vaddwod_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vaddwod_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vaddwod_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsubwev_d_w(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsubwev_w_h(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsubwev_h_b(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsubwod_d_w(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsubwod_w_h(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsubwod_h_b(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vsubwev_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vsubwev_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vsubwev_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vsubwod_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vsubwod_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vsubwod_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vaddwev_q_d(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vaddwod_q_d(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vaddwev_q_du(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vaddwod_q_du(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsubwev_q_d(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsubwod_q_d(_1, _2); ++} ++// CHECK-LABEL: @vsubwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vsubwev_q_du(_1, _2); ++} ++// CHECK-LABEL: @vsubwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vsubwod_q_du(_1, _2); ++} ++// CHECK-LABEL: @vaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vaddwev_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vaddwod_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vmulwev_d_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vmulwev_w_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vmulwev_h_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vmulwod_d_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vmulwod_w_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vmulwod_h_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmulwev_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmulwev_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmulwev_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { ++ return __builtin_lsx_vmulwod_d_wu(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { ++ return __builtin_lsx_vmulwod_w_hu(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { ++ return __builtin_lsx_vmulwod_h_bu(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vmulwev_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vmulwev_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vmulwev_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vmulwod_d_wu_w(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vmulwod_w_hu_h(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vmulwod_h_bu_b(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vmulwev_q_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vmulwod_q_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmulwev_q_du(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vmulwod_q_du(_1, _2); ++} ++// CHECK-LABEL: @vmulwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vmulwev_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vmulwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vmulwod_q_du_d(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vhaddw_q_d(_1, _2); ++} ++// CHECK-LABEL: @vhaddw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vhaddw_qu_du(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vhsubw_q_d(_1, _2); ++} ++// CHECK-LABEL: @vhsubw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { ++ return __builtin_lsx_vhsubw_qu_du(_1, _2); ++} ++// CHECK-LABEL: @vmaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { ++ return __builtin_lsx_vmaddwev_d_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vmaddwev_w_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vmaddwev_h_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { ++ return __builtin_lsx_vmaddwev_d_wu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { ++ return __builtin_lsx_vmaddwev_w_hu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { ++ return __builtin_lsx_vmaddwev_h_bu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { ++ return __builtin_lsx_vmaddwod_d_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { ++ return __builtin_lsx_vmaddwod_w_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vmaddwod_h_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { ++ return __builtin_lsx_vmaddwod_d_wu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { ++ return __builtin_lsx_vmaddwod_w_hu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { ++ return __builtin_lsx_vmaddwod_h_bu(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { ++ return __builtin_lsx_vmaddwev_d_wu_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { ++ return __builtin_lsx_vmaddwev_w_hu_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { ++ return __builtin_lsx_vmaddwev_h_bu_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { ++ return __builtin_lsx_vmaddwod_d_wu_w(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { ++ return __builtin_lsx_vmaddwod_w_hu_h(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { ++ return __builtin_lsx_vmaddwod_h_bu_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __builtin_lsx_vmaddwev_q_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { ++ return __builtin_lsx_vmaddwod_q_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { ++ return __builtin_lsx_vmaddwev_q_du(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { ++ return __builtin_lsx_vmaddwod_q_du(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { ++ return __builtin_lsx_vmaddwev_q_du_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vmaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { ++ return __builtin_lsx_vmaddwod_q_du_d(_1, _2, _3); ++} ++// CHECK-LABEL: @vrotr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vrotr_b(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vrotr_b(_1, _2); ++} ++// CHECK-LABEL: @vrotr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vrotr_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vrotr_h(_1, _2); ++} ++// CHECK-LABEL: @vrotr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vrotr_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vrotr_w(_1, _2); ++} ++// CHECK-LABEL: @vrotr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vrotr_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vrotr_d(_1, _2); ++} ++// CHECK-LABEL: @vadd_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_q(_1, _2); } ++// CHECK-LABEL: @vsub_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_q(_1, _2); } ++// CHECK-LABEL: @vldrepl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vldrepl_b(void *_1) { return __builtin_lsx_vldrepl_b(_1, 1); } ++// CHECK-LABEL: @vldrepl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr _1:%.*, i32 2) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vldrepl_h(void *_1) { return __builtin_lsx_vldrepl_h(_1, 2); } ++// CHECK-LABEL: @vldrepl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr _1:%.*, i32 4) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vldrepl_w(void *_1) { return __builtin_lsx_vldrepl_w(_1, 4); } ++// CHECK-LABEL: @vldrepl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr _1:%.*, i32 8) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vldrepl_d(void *_1) { return __builtin_lsx_vldrepl_d(_1, 8); } ++// CHECK-LABEL: @vmskgez_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmskgez_b(v16i8 _1) { return __builtin_lsx_vmskgez_b(_1); } ++// CHECK-LABEL: @vmsknz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vmsknz_b(v16i8 _1) { return __builtin_lsx_vmsknz_b(_1); } ++// CHECK-LABEL: @vexth_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vexth_h_b(v16i8 _1) { return __builtin_lsx_vexth_h_b(_1); } ++// CHECK-LABEL: @vexth_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vexth_w_h(v8i16 _1) { return __builtin_lsx_vexth_w_h(_1); } ++// CHECK-LABEL: @vexth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vexth_d_w(v4i32 _1) { return __builtin_lsx_vexth_d_w(_1); } ++// CHECK-LABEL: @vexth_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vexth_q_d(v2i64 _1) { return __builtin_lsx_vexth_q_d(_1); } ++// CHECK-LABEL: @vexth_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vexth_hu_bu(v16u8 _1) { return __builtin_lsx_vexth_hu_bu(_1); } ++// CHECK-LABEL: @vexth_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vexth_wu_hu(v8u16 _1) { return __builtin_lsx_vexth_wu_hu(_1); } ++// CHECK-LABEL: @vexth_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vexth_du_wu(v4u32 _1) { return __builtin_lsx_vexth_du_wu(_1); } ++// CHECK-LABEL: @vexth_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vexth_qu_du(v2u64 _1) { return __builtin_lsx_vexth_qu_du(_1); } ++// CHECK-LABEL: @vrotri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vrotri_b(v16i8 _1) { return __builtin_lsx_vrotri_b(_1, 1); } ++// CHECK-LABEL: @vrotri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vrotri_h(v8i16 _1) { return __builtin_lsx_vrotri_h(_1, 1); } ++// CHECK-LABEL: @vrotri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vrotri_w(v4i32 _1) { return __builtin_lsx_vrotri_w(_1, 1); } ++// CHECK-LABEL: @vrotri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vrotri_d(v2i64 _1) { return __builtin_lsx_vrotri_d(_1, 1); } ++// CHECK-LABEL: @vextl_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vextl_q_d(v2i64 _1) { return __builtin_lsx_vextl_q_d(_1); } ++// CHECK-LABEL: @vsrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrlni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrlni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrlni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrlni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrlrni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrlrni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrlrni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrlrni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrlni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrlni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrlni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrlni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrlni_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrlni_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrlni_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrlni_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrlrni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrlrni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrlrni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrlrni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrlrni_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrlrni_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrlrni_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrlrni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrlrni_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrani_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrani_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrani_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrani_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vsrarni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vsrarni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vsrarni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vsrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vsrarni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrani_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrani_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrani_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrani_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrani_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrani_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrani_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrani_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrani_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrarni_b_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrarni_h_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrarni_w_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrarni_d_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { ++ return __builtin_lsx_vssrarni_bu_h(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrarni_hu_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrarni_wu_d(_1, _2, 1); ++} ++// CHECK-LABEL: @vssrarni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrarni_du_q(_1, _2, 1); ++} ++// CHECK-LABEL: @vpermi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vpermi_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vpermi_w(_1, _2, 1); ++} ++// CHECK-LABEL: @vld( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vld(void *_1) { return __builtin_lsx_vld(_1, 1); } ++// CHECK-LABEL: @vst( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> _1:%.*, ptr _2:%.*, i32 1) ++// CHECK-NEXT: ret void ++// ++void vst(v16i8 _1, void *_2) { return __builtin_lsx_vst(_1, _2, 1); } ++// CHECK-LABEL: @vssrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrlrn_b_h(_1, _2); ++} ++// CHECK-LABEL: @vssrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrlrn_h_w(_1, _2); ++} ++// CHECK-LABEL: @vssrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrlrn_w_d(_1, _2); ++} ++// CHECK-LABEL: @vssrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { ++ return __builtin_lsx_vssrln_b_h(_1, _2); ++} ++// CHECK-LABEL: @vssrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { ++ return __builtin_lsx_vssrln_h_w(_1, _2); ++} ++// CHECK-LABEL: @vssrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { ++ return __builtin_lsx_vssrln_w_d(_1, _2); ++} ++// CHECK-LABEL: @vorn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __builtin_lsx_vorn_v(_1, _2); } ++// CHECK-LABEL: @vldi( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vldi() { return __builtin_lsx_vldi(1); } ++// CHECK-LABEL: @vshuf_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { ++ return __builtin_lsx_vshuf_b(_1, _2, _3); ++} ++// CHECK-LABEL: @vldx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr _1:%.*, i64 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vldx(void *_1) { return __builtin_lsx_vldx(_1, 1); } ++// CHECK-LABEL: @vstx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> _1:%.*, ptr _2:%.*, i64 1) ++// CHECK-NEXT: ret void ++// ++void vstx(v16i8 _1, void *_2) { return __builtin_lsx_vstx(_1, _2, 1); } ++// CHECK-LABEL: @vextl_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2u64 vextl_qu_du(v2u64 _1) { return __builtin_lsx_vextl_qu_du(_1); } ++// CHECK-LABEL: @bnz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bnz_b(v16u8 _1) { return __builtin_lsx_bnz_b(_1); } ++// CHECK-LABEL: @bnz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bnz_d(v2u64 _1) { return __builtin_lsx_bnz_d(_1); } ++// CHECK-LABEL: @bnz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bnz_h(v8u16 _1) { return __builtin_lsx_bnz_h(_1); } ++// CHECK-LABEL: @bnz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bnz_v(v16u8 _1) { return __builtin_lsx_bnz_v(_1); } ++// CHECK-LABEL: @bnz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bnz_w(v4u32 _1) { return __builtin_lsx_bnz_w(_1); } ++// CHECK-LABEL: @bz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bz_b(v16u8 _1) { return __builtin_lsx_bz_b(_1); } ++// CHECK-LABEL: @bz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bz_d(v2u64 _1) { return __builtin_lsx_bz_d(_1); } ++// CHECK-LABEL: @bz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bz_h(v8u16 _1) { return __builtin_lsx_bz_h(_1); } ++// CHECK-LABEL: @bz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bz_v(v16u8 _1) { return __builtin_lsx_bz_v(_1); } ++// CHECK-LABEL: @bz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int bz_w(v4u32 _1) { return __builtin_lsx_bz_w(_1); } ++// CHECK-LABEL: @vfcmp_caf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_caf_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_caf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_caf_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_ceq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_ceq_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_ceq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_ceq_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cle_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cle_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_clt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_clt_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_clt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_clt_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cne_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cne_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cor_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cor_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cueq_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cueq_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cule_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cule_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cult_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cult_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cun_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_cune_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cune_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_cun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_cun_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_saf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_saf_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_saf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_saf_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_seq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_seq_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_seq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_seq_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sle_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sle_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_slt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_slt_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_slt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_slt_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sne_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sne_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sor_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sor_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sueq_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sueq_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sule_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sule_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sult_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sult_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sun_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> _1:%.*, <2 x double> _2:%.*) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { ++ return __builtin_lsx_vfcmp_sune_d(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sune_s(_1, _2); ++} ++// CHECK-LABEL: @vfcmp_sun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> _1:%.*, <4 x float> _2:%.*) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { ++ return __builtin_lsx_vfcmp_sun_s(_1, _2); ++} ++// CHECK-LABEL: @vrepli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) ++// CHECK-NEXT: ret <16 x i8> TMP0 ++// ++v16i8 vrepli_b() { return __builtin_lsx_vrepli_b(1); } ++// CHECK-LABEL: @vrepli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) ++// CHECK-NEXT: ret <2 x i64> TMP0 ++// ++v2i64 vrepli_d() { return __builtin_lsx_vrepli_d(1); } ++// CHECK-LABEL: @vrepli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) ++// CHECK-NEXT: ret <8 x i16> TMP0 ++// ++v8i16 vrepli_h() { return __builtin_lsx_vrepli_h(1); } ++// CHECK-LABEL: @vrepli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) ++// CHECK-NEXT: ret <4 x i32> TMP0 ++// ++v4i32 vrepli_w() { return __builtin_lsx_vrepli_w(1); } +-- +2.20.1 + + +From c403023799745d743eb3aa8d8719318c58e395d4 Mon Sep 17 00:00:00 2001 +From: chenli <chenli@loongson.cn> +Date: Fri, 27 Oct 2023 15:58:55 +0800 +Subject: PATCH 7/8 LoongArchCodeGen Add LASX builtin testcases + +(cherry picked from commit 535408eedbf812d9038bd40a0faae5001d2256cf) +--- + .../LoongArch/lasx/builtin-alias-error.c | 1373 +++++ + .../CodeGen/LoongArch/lasx/builtin-alias.c | 4430 ++++++++++++++++ + .../CodeGen/LoongArch/lasx/builtin-error.c | 1392 ++++++ + clang/test/CodeGen/LoongArch/lasx/builtin.c | 4452 +++++++++++++++++ + 4 files changed, 11647 insertions(+) + create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c + create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-alias.c + create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin-error.c + create mode 100644 clang/test/CodeGen/LoongArch/lasx/builtin.c + +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c +new file mode 100644 +index 000000000000..2a3862bbe3c1 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias-error.c +@@ -0,0 +1,1373 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s ++ ++#include <lasxintrin.h> ++ ++v32i8 xvslli_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvslli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslli_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvslli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslli_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvslli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslli_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvslli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrai_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvsrai_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrai_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvsrai_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrai_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvsrai_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrai_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvsrai_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrari_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvsrari_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrari_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvsrari_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrari_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvsrari_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrari_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvsrari_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrli_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvsrli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrli_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvsrli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrli_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvsrli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrli_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvsrli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlri_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvsrlri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlri_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvsrlri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlri_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvsrlri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlri_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvsrlri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitclri_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvbitclri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitclri_h(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvbitclri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitclri_w(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvbitclri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitclri_d(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvbitclri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitseti_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvbitseti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitseti_h(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvbitseti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitseti_w(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvbitseti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitseti_d(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvbitseti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitrevi_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitrevi_h(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitrevi_w(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitrevi_d(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvaddi_bu(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvaddi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvaddi_hu(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvaddi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvaddi_wu(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvaddi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvaddi_du(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvaddi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsubi_bu(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvsubi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsubi_hu(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvsubi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsubi_wu(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvsubi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsubi_du(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvsubi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvmaxi_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvmaxi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvmaxi_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvmaxi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvmaxi_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvmaxi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvmaxi_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvmaxi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvmaxi_bu(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvmaxi_hu(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvmaxi_wu(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvmaxi_du(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvmaxi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvmini_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvmini_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvmini_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvmini_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_h' must be a constant integer}}} ++ return res; ++} ++ ++v8i32 xvmini_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvmini_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvmini_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvmini_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvmini_bu(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvmini_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvmini_hu(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvmini_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvmini_wu(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvmini_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvmini_du(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvmini_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvseqi_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvseqi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvseqi_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvseqi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvseqi_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvseqi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvseqi_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvseqi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslti_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvslti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslti_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvslti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslti_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvslti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslti_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvslti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslti_bu(v32u8 _1, int var) { ++ v32i8 res = __lasx_xvslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvslti_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslti_hu(v16u16 _1, int var) { ++ v16i16 res = __lasx_xvslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvslti_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslti_wu(v8u32 _1, int var) { ++ v8i32 res = __lasx_xvslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvslti_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslti_du(v4u64 _1, int var) { ++ v4i64 res = __lasx_xvslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvslti_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslei_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvslei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslei_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvslei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslei_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvslei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslei_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __lasx_xvslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __lasx_xvslei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslei_bu(v32u8 _1, int var) { ++ v32i8 res = __lasx_xvslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvslei_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslei_hu(v16u16 _1, int var) { ++ v16i16 res = __lasx_xvslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvslei_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslei_wu(v8u32 _1, int var) { ++ v8i32 res = __lasx_xvslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvslei_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslei_du(v4u64 _1, int var) { ++ v4i64 res = __lasx_xvslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvslei_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsat_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvsat_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsat_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvsat_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsat_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvsat_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsat_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvsat_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvsat_bu(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvsat_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvsat_hu(v16u16 _1, int var) { ++ v16u16 res = __lasx_xvsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvsat_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvsat_wu(v8u32 _1, int var) { ++ v8u32 res = __lasx_xvsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvsat_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvsat_du(v4u64 _1, int var) { ++ v4u64 res = __lasx_xvsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvsat_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrepl128vei_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvrepl128vei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvrepl128vei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvrepl128vei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrepl128vei_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvrepl128vei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvrepl128vei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvrepl128vei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrepl128vei_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvrepl128vei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __lasx_xvrepl128vei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __lasx_xvrepl128vei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrepl128vei_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvrepl128vei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 1}} ++ res |= __lasx_xvrepl128vei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range 0, 1}} ++ res |= __lasx_xvrepl128vei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvandi_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lasx_xvandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lasx_xvandi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvandi_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvori_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lasx_xvori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lasx_xvori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvnori_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lasx_xvnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lasx_xvnori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvnori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvxori_b(v32u8 _1, int var) { ++ v32u8 res = __lasx_xvxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lasx_xvxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lasx_xvxori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvxori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitseli_b(v32u8 _1, v32u8 _2, int var) { ++ v32u8 res = __lasx_xvbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lasx_xvbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lasx_xvbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvbitseli_b' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvshuf4i_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lasx_xvshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lasx_xvshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvshuf4i_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lasx_xvshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lasx_xvshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvshuf4i_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lasx_xvshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lasx_xvshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lasx_xvshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lasx_xvshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_d' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvpermi_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lasx_xvpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lasx_xvpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvpermi_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvpermi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lasx_xvpermi_d(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lasx_xvpermi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpermi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvpermi_q(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvpermi_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lasx_xvpermi_q(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lasx_xvpermi_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_q' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsllwil_h_b(v32i8 _1, int var) { ++ v16i16 res = __lasx_xvsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_h_b' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsllwil_w_h(v16i16 _1, int var) { ++ v8i32 res = __lasx_xvsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_w_h' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsllwil_d_w(v8i32 _1, int var) { ++ v4i64 res = __lasx_xvsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_d_w' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvsllwil_hu_bu(v32u8 _1, int var) { ++ v16u16 res = __lasx_xvsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_hu_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvsllwil_wu_hu(v16u16 _1, int var) { ++ v8u32 res = __lasx_xvsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_wu_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvsllwil_du_wu(v8u32 _1, int var) { ++ v4u64 res = __lasx_xvsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_du_wu' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_h' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvbsrl_v(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvbsrl_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsrl_v' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvbsll_v(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvbsll_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsll_v' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvextrins_b(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lasx_xvextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lasx_xvextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvextrins_h(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lasx_xvextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lasx_xvextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvextrins_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lasx_xvextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lasx_xvextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvextrins_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __lasx_xvextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __lasx_xvextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvld(void *_1, int var) { ++ v32i8 res = __lasx_xvld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range -2048, 2047}} ++ res |= __lasx_xvld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2047}} ++ res |= __lasx_xvld(_1, var); // expected-error {{argument to '__builtin_lasx_xvld' must be a constant integer}} ++ return res; ++} ++ ++void xvst(v32i8 _1, void *_2, int var) { ++ __lasx_xvst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range -2048, 2047}} ++ __lasx_xvst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2047}} ++ __lasx_xvst(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvst' must be a constant integer}} ++} ++ ++void xvstelm_b(v32i8 _1, void * _2, int var) { ++ __lasx_xvstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range -128, 127}} ++ __lasx_xvstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range -128, 127}} ++ __lasx_xvstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} ++} ++ ++void xvstelm_h(v16i16 _1, void * _2, int var) { ++ __lasx_xvstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range -256, 254}} ++ __lasx_xvstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range -256, 254}} ++ __lasx_xvstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} ++} ++ ++void xvstelm_w(v8i32 _1, void * _2, int var) { ++ __lasx_xvstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range -512, 508}} ++ __lasx_xvstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range -512, 508}} ++ __lasx_xvstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} ++} ++ ++void xvstelm_d(v4i64 _1, void * _2, int var) { ++ __lasx_xvstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range -1024, 1016}} ++ __lasx_xvstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range -1024, 1016}} ++ __lasx_xvstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} ++} ++ ++void xvstelm_b_idx(v32i8 _1, void * _2, int var) { ++ __lasx_xvstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ __lasx_xvstelm_b(_1, _2, 1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ __lasx_xvstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} ++} ++ ++void xvstelm_h_idx(v16i16 _1, void * _2, int var) { ++ __lasx_xvstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ __lasx_xvstelm_h(_1, _2, 2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ __lasx_xvstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} ++} ++ ++void xvstelm_w_idx(v8i32 _1, void * _2, int var) { ++ __lasx_xvstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ __lasx_xvstelm_w(_1, _2, 4, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ __lasx_xvstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} ++} ++ ++void xvstelm_d_idx(v4i64 _1, void * _2, int var) { ++ __lasx_xvstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ __lasx_xvstelm_d(_1, _2, 8, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ __lasx_xvstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} ++} ++ ++v8i32 xvinsve0_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvinsve0_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvinsve0_w(_1, _2, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvinsve0_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvinsve0_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvinsve0_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __lasx_xvinsve0_d(_1, _2, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __lasx_xvinsve0_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_d' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvpickve_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvpickve_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvpickve_w(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvpickve_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvpickve_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvpickve_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __lasx_xvpickve_d(_1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __lasx_xvpickve_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvldi(int var) { ++ v4i64 res = __lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range -4096, 4095}} ++ res |= __lasx_xvldi(4096); // expected-error {{argument value 4096 is outside the valid range -4096, 4095}} ++ res |= __lasx_xvldi(var); // expected-error {{argument to '__builtin_lasx_xvldi' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvinsgr2vr_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvinsgr2vr_w(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvinsgr2vr_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __lasx_xvinsgr2vr_d(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __lasx_xvinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvldrepl_b(void *_1, int var) { ++ v32i8 res = __lasx_xvldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range -2048, 2047}} ++ res |= __lasx_xvldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2047}} ++ res |= __lasx_xvldrepl_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvldrepl_h(void *_1, int var) { ++ v16i16 res = __lasx_xvldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range -2048, 2046}} ++ res |= __lasx_xvldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2046}} ++ res |= __lasx_xvldrepl_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvldrepl_w(void *_1, int var) { ++ v8i32 res = __lasx_xvldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range -2048, 2044}} ++ res |= __lasx_xvldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2044}} ++ res |= __lasx_xvldrepl_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvldrepl_d(void *_1, int var) { ++ v4i64 res = __lasx_xvldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range -2048, 2040}} ++ res |= __lasx_xvldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2040}} ++ res |= __lasx_xvldrepl_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_d' must be a constant integer}} ++ return res; ++} ++ ++int xvpickve2gr_w(v8i32 _1, int var) { ++ int res = __lasx_xvpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvpickve2gr_w(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_w' must be a constant integer}} ++ return res; ++} ++ ++unsigned int xvpickve2gr_wu(v8i32 _1, int var) { ++ unsigned int res = __lasx_xvpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvpickve2gr_wu(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_wu' must be a constant integer}} ++ return res; ++} ++ ++long xvpickve2gr_d(v4i64 _1, int var) { ++ long res = __lasx_xvpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __lasx_xvpickve2gr_d(_1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __lasx_xvpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_d' must be a constant integer}} ++ return res; ++} ++ ++unsigned long int xvpickve2gr_du(v4i64 _1, int var) { ++ unsigned long int res = __lasx_xvpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __lasx_xvpickve2gr_du(_1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __lasx_xvpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrotri_b(v32i8 _1, int var) { ++ v32i8 res = __lasx_xvrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __lasx_xvrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __lasx_xvrotri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrotri_h(v16i16 _1, int var) { ++ v16i16 res = __lasx_xvrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvrotri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrotri_w(v8i32 _1, int var) { ++ v8i32 res = __lasx_xvrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvrotri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrotri_d(v4i64 _1, int var) { ++ v4i64 res = __lasx_xvrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvrotri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lasx_xvsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lasx_xvsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lasx_xvsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lasx_xvsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lasx_xvssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lasx_xvssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __lasx_xvssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __lasx_xvssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __lasx_xvssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __lasx_xvssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lasx_xvssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lasx_xvssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lasx_xvssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lasx_xvssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __lasx_xvssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __lasx_xvssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __lasx_xvssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __lasx_xvssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lasx_xvssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lasx_xvssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lasx_xvsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lasx_xvsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lasx_xvsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lasx_xvsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lasx_xvssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lasx_xvssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __lasx_xvssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __lasx_xvssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __lasx_xvssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __lasx_xvssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lasx_xvssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lasx_xvssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __lasx_xvssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __lasx_xvssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __lasx_xvssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __lasx_xvssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lasx_xvssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lasx_xvssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __lasx_xvssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __lasx_xvssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __lasx_xvssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __lasx_xvssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __lasx_xvssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __lasx_xvssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __lasx_xvssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __lasx_xvssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __lasx_xvssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __lasx_xvssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __lasx_xvssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __lasx_xvssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v4f64 xvpickve_d_f(v4f64 _1, int var) { ++ v4f64 res = __lasx_xvpickve_d_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res += __lasx_xvpickve_d_f(_1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res += __lasx_xvpickve_d_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d_f' must be a constant integer}} ++ return res; ++} ++ ++v8f32 xvpickve_w_f(v8f32 _1, int var) { ++ v8f32 res = __lasx_xvpickve_w_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res += __lasx_xvpickve_w_f(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res += __lasx_xvpickve_w_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w_f' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrepli_b(int var) { ++ v32i8 res = __lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range -512, 511}} ++ res |= __lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range -512, 511}} ++ res |= __lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrepli_d(int var) { ++ v4i64 res = __lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range -512, 511}} ++ res |= __lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range -512, 511}} ++ res |= __lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrepli_h(int var) { ++ v16i16 res = __lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range -512, 511}} ++ res |= __lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range -512, 511}} ++ res |= __lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrepli_w(int var) { ++ v8i32 res = __lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range -512, 511}} ++ res |= __lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range -512, 511}} ++ res |= __lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c +new file mode 100644 +index 000000000000..09b2d5fcacf5 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c +@@ -0,0 +1,4430 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s ++ ++#include <lasxintrin.h> ++ ++// CHECK-LABEL: @xvsll_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); } ++// CHECK-LABEL: @xvsll_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); } ++// CHECK-LABEL: @xvsll_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); } ++// CHECK-LABEL: @xvsll_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); } ++// CHECK-LABEL: @xvslli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); } ++// CHECK-LABEL: @xvslli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); } ++// CHECK-LABEL: @xvslli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); } ++// CHECK-LABEL: @xvslli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); } ++// CHECK-LABEL: @xvsra_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); } ++// CHECK-LABEL: @xvsra_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); } ++// CHECK-LABEL: @xvsra_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); } ++// CHECK-LABEL: @xvsra_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); } ++// CHECK-LABEL: @xvsrai_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); } ++// CHECK-LABEL: @xvsrai_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); } ++// CHECK-LABEL: @xvsrai_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); } ++// CHECK-LABEL: @xvsrai_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); } ++// CHECK-LABEL: @xvsrar_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); } ++// CHECK-LABEL: @xvsrar_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); } ++// CHECK-LABEL: @xvsrar_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); } ++// CHECK-LABEL: @xvsrar_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); } ++// CHECK-LABEL: @xvsrari_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); } ++// CHECK-LABEL: @xvsrari_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); } ++// CHECK-LABEL: @xvsrari_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); } ++// CHECK-LABEL: @xvsrari_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); } ++// CHECK-LABEL: @xvsrl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); } ++// CHECK-LABEL: @xvsrl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); } ++// CHECK-LABEL: @xvsrl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); } ++// CHECK-LABEL: @xvsrl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); } ++// CHECK-LABEL: @xvsrli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrli_b(v32i8 _1) { return __lasx_xvsrli_b(_1, 1); } ++// CHECK-LABEL: @xvsrli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrli_h(v16i16 _1) { return __lasx_xvsrli_h(_1, 1); } ++// CHECK-LABEL: @xvsrli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrli_w(v8i32 _1) { return __lasx_xvsrli_w(_1, 1); } ++// CHECK-LABEL: @xvsrli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrli_d(v4i64 _1) { return __lasx_xvsrli_d(_1, 1); } ++// CHECK-LABEL: @xvsrlr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrlr_b(_1, _2); } ++// CHECK-LABEL: @xvsrlr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlr_h(_1, _2); } ++// CHECK-LABEL: @xvsrlr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlr_w(_1, _2); } ++// CHECK-LABEL: @xvsrlr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlr_d(_1, _2); } ++// CHECK-LABEL: @xvsrlri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrlri_b(v32i8 _1) { return __lasx_xvsrlri_b(_1, 1); } ++// CHECK-LABEL: @xvsrlri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrlri_h(v16i16 _1) { return __lasx_xvsrlri_h(_1, 1); } ++// CHECK-LABEL: @xvsrlri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrlri_w(v8i32 _1) { return __lasx_xvsrlri_w(_1, 1); } ++// CHECK-LABEL: @xvsrlri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrlri_d(v4i64 _1) { return __lasx_xvsrlri_d(_1, 1); } ++// CHECK-LABEL: @xvbitclr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitclr_b(_1, _2); } ++// CHECK-LABEL: @xvbitclr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitclr_h(_1, _2); } ++// CHECK-LABEL: @xvbitclr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitclr_w(_1, _2); } ++// CHECK-LABEL: @xvbitclr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitclr_d(_1, _2); } ++// CHECK-LABEL: @xvbitclri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvbitclri_b(v32u8 _1) { return __lasx_xvbitclri_b(_1, 1); } ++// CHECK-LABEL: @xvbitclri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvbitclri_h(v16u16 _1) { return __lasx_xvbitclri_h(_1, 1); } ++// CHECK-LABEL: @xvbitclri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvbitclri_w(v8u32 _1) { return __lasx_xvbitclri_w(_1, 1); } ++// CHECK-LABEL: @xvbitclri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvbitclri_d(v4u64 _1) { return __lasx_xvbitclri_d(_1, 1); } ++// CHECK-LABEL: @xvbitset_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitset_b(_1, _2); } ++// CHECK-LABEL: @xvbitset_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitset_h(_1, _2); } ++// CHECK-LABEL: @xvbitset_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitset_w(_1, _2); } ++// CHECK-LABEL: @xvbitset_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitset_d(_1, _2); } ++// CHECK-LABEL: @xvbitseti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvbitseti_b(v32u8 _1) { return __lasx_xvbitseti_b(_1, 1); } ++// CHECK-LABEL: @xvbitseti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvbitseti_h(v16u16 _1) { return __lasx_xvbitseti_h(_1, 1); } ++// CHECK-LABEL: @xvbitseti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvbitseti_w(v8u32 _1) { return __lasx_xvbitseti_w(_1, 1); } ++// CHECK-LABEL: @xvbitseti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvbitseti_d(v4u64 _1) { return __lasx_xvbitseti_d(_1, 1); } ++// CHECK-LABEL: @xvbitrev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitrev_b(_1, _2); } ++// CHECK-LABEL: @xvbitrev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitrev_h(_1, _2); } ++// CHECK-LABEL: @xvbitrev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitrev_w(_1, _2); } ++// CHECK-LABEL: @xvbitrev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitrev_d(_1, _2); } ++// CHECK-LABEL: @xvbitrevi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvbitrevi_b(v32u8 _1) { return __lasx_xvbitrevi_b(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvbitrevi_h(v16u16 _1) { return __lasx_xvbitrevi_h(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvbitrevi_w(v8u32 _1) { return __lasx_xvbitrevi_w(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvbitrevi_d(v4u64 _1) { return __lasx_xvbitrevi_d(_1, 1); } ++// CHECK-LABEL: @xvadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvadd_b(_1, _2); } ++// CHECK-LABEL: @xvadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvadd_h(_1, _2); } ++// CHECK-LABEL: @xvadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvadd_w(_1, _2); } ++// CHECK-LABEL: @xvadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvadd_d(_1, _2); } ++// CHECK-LABEL: @xvaddi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvaddi_bu(v32i8 _1) { return __lasx_xvaddi_bu(_1, 1); } ++// CHECK-LABEL: @xvaddi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvaddi_hu(v16i16 _1) { return __lasx_xvaddi_hu(_1, 1); } ++// CHECK-LABEL: @xvaddi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvaddi_wu(v8i32 _1) { return __lasx_xvaddi_wu(_1, 1); } ++// CHECK-LABEL: @xvaddi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddi_du(v4i64 _1) { return __lasx_xvaddi_du(_1, 1); } ++// CHECK-LABEL: @xvsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __lasx_xvsub_b(_1, _2); } ++// CHECK-LABEL: @xvsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __lasx_xvsub_h(_1, _2); } ++// CHECK-LABEL: @xvsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __lasx_xvsub_w(_1, _2); } ++// CHECK-LABEL: @xvsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __lasx_xvsub_d(_1, _2); } ++// CHECK-LABEL: @xvsubi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsubi_bu(v32i8 _1) { return __lasx_xvsubi_bu(_1, 1); } ++// CHECK-LABEL: @xvsubi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsubi_hu(v16i16 _1) { return __lasx_xvsubi_hu(_1, 1); } ++// CHECK-LABEL: @xvsubi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsubi_wu(v8i32 _1) { return __lasx_xvsubi_wu(_1, 1); } ++// CHECK-LABEL: @xvsubi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubi_du(v4i64 _1) { return __lasx_xvsubi_du(_1, 1); } ++// CHECK-LABEL: @xvmax_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __lasx_xvmax_b(_1, _2); } ++// CHECK-LABEL: @xvmax_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __lasx_xvmax_h(_1, _2); } ++// CHECK-LABEL: @xvmax_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __lasx_xvmax_w(_1, _2); } ++// CHECK-LABEL: @xvmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __lasx_xvmax_d(_1, _2); } ++// CHECK-LABEL: @xvmaxi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmaxi_b(v32i8 _1) { return __lasx_xvmaxi_b(_1, 1); } ++// CHECK-LABEL: @xvmaxi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmaxi_h(v16i16 _1) { return __lasx_xvmaxi_h(_1, 1); } ++// CHECK-LABEL: @xvmaxi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmaxi_w(v8i32 _1) { return __lasx_xvmaxi_w(_1, 1); } ++// CHECK-LABEL: @xvmaxi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaxi_d(v4i64 _1) { return __lasx_xvmaxi_d(_1, 1); } ++// CHECK-LABEL: @xvmax_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmax_bu(_1, _2); } ++// CHECK-LABEL: @xvmax_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmax_hu(_1, _2); } ++// CHECK-LABEL: @xvmax_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmax_wu(_1, _2); } ++// CHECK-LABEL: @xvmax_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __lasx_xvmax_du(_1, _2); } ++// CHECK-LABEL: @xvmaxi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvmaxi_bu(v32u8 _1) { return __lasx_xvmaxi_bu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvmaxi_hu(v16u16 _1) { return __lasx_xvmaxi_hu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvmaxi_wu(v8u32 _1) { return __lasx_xvmaxi_wu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmaxi_du(v4u64 _1) { return __lasx_xvmaxi_du(_1, 1); } ++// CHECK-LABEL: @xvmin_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __lasx_xvmin_b(_1, _2); } ++// CHECK-LABEL: @xvmin_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __lasx_xvmin_h(_1, _2); } ++// CHECK-LABEL: @xvmin_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __lasx_xvmin_w(_1, _2); } ++// CHECK-LABEL: @xvmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __lasx_xvmin_d(_1, _2); } ++// CHECK-LABEL: @xvmini_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmini_b(v32i8 _1) { return __lasx_xvmini_b(_1, 1); } ++// CHECK-LABEL: @xvmini_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmini_h(v16i16 _1) { return __lasx_xvmini_h(_1, 1); } ++// CHECK-LABEL: @xvmini_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmini_w(v8i32 _1) { return __lasx_xvmini_w(_1, 1); } ++// CHECK-LABEL: @xvmini_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmini_d(v4i64 _1) { return __lasx_xvmini_d(_1, 1); } ++// CHECK-LABEL: @xvmin_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmin_bu(_1, _2); } ++// CHECK-LABEL: @xvmin_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmin_hu(_1, _2); } ++// CHECK-LABEL: @xvmin_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmin_wu(_1, _2); } ++// CHECK-LABEL: @xvmin_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __lasx_xvmin_du(_1, _2); } ++// CHECK-LABEL: @xvmini_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvmini_bu(v32u8 _1) { return __lasx_xvmini_bu(_1, 1); } ++// CHECK-LABEL: @xvmini_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvmini_hu(v16u16 _1) { return __lasx_xvmini_hu(_1, 1); } ++// CHECK-LABEL: @xvmini_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvmini_wu(v8u32 _1) { return __lasx_xvmini_wu(_1, 1); } ++// CHECK-LABEL: @xvmini_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmini_du(v4u64 _1) { return __lasx_xvmini_du(_1, 1); } ++// CHECK-LABEL: @xvseq_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __lasx_xvseq_b(_1, _2); } ++// CHECK-LABEL: @xvseq_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __lasx_xvseq_h(_1, _2); } ++// CHECK-LABEL: @xvseq_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __lasx_xvseq_w(_1, _2); } ++// CHECK-LABEL: @xvseq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __lasx_xvseq_d(_1, _2); } ++// CHECK-LABEL: @xvseqi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvseqi_b(v32i8 _1) { return __lasx_xvseqi_b(_1, 1); } ++// CHECK-LABEL: @xvseqi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvseqi_h(v16i16 _1) { return __lasx_xvseqi_h(_1, 1); } ++// CHECK-LABEL: @xvseqi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvseqi_w(v8i32 _1) { return __lasx_xvseqi_w(_1, 1); } ++// CHECK-LABEL: @xvseqi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvseqi_d(v4i64 _1) { return __lasx_xvseqi_d(_1, 1); } ++// CHECK-LABEL: @xvslt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __lasx_xvslt_b(_1, _2); } ++// CHECK-LABEL: @xvslt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __lasx_xvslt_h(_1, _2); } ++// CHECK-LABEL: @xvslt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __lasx_xvslt_w(_1, _2); } ++// CHECK-LABEL: @xvslt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __lasx_xvslt_d(_1, _2); } ++// CHECK-LABEL: @xvslti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvslti_b(v32i8 _1) { return __lasx_xvslti_b(_1, 1); } ++// CHECK-LABEL: @xvslti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvslti_h(v16i16 _1) { return __lasx_xvslti_h(_1, 1); } ++// CHECK-LABEL: @xvslti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvslti_w(v8i32 _1) { return __lasx_xvslti_w(_1, 1); } ++// CHECK-LABEL: @xvslti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvslti_d(v4i64 _1) { return __lasx_xvslti_d(_1, 1); } ++// CHECK-LABEL: @xvslt_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __lasx_xvslt_bu(_1, _2); } ++// CHECK-LABEL: @xvslt_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __lasx_xvslt_hu(_1, _2); } ++// CHECK-LABEL: @xvslt_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __lasx_xvslt_wu(_1, _2); } ++// CHECK-LABEL: @xvslt_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __lasx_xvslt_du(_1, _2); } ++// CHECK-LABEL: @xvslti_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvslti_bu(v32u8 _1) { return __lasx_xvslti_bu(_1, 1); } ++// CHECK-LABEL: @xvslti_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvslti_hu(v16u16 _1) { return __lasx_xvslti_hu(_1, 1); } ++// CHECK-LABEL: @xvslti_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvslti_wu(v8u32 _1) { return __lasx_xvslti_wu(_1, 1); } ++// CHECK-LABEL: @xvslti_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvslti_du(v4u64 _1) { return __lasx_xvslti_du(_1, 1); } ++// CHECK-LABEL: @xvsle_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __lasx_xvsle_b(_1, _2); } ++// CHECK-LABEL: @xvsle_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __lasx_xvsle_h(_1, _2); } ++// CHECK-LABEL: @xvsle_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __lasx_xvsle_w(_1, _2); } ++// CHECK-LABEL: @xvsle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __lasx_xvsle_d(_1, _2); } ++// CHECK-LABEL: @xvslei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvslei_b(v32i8 _1) { return __lasx_xvslei_b(_1, 1); } ++// CHECK-LABEL: @xvslei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvslei_h(v16i16 _1) { return __lasx_xvslei_h(_1, 1); } ++// CHECK-LABEL: @xvslei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvslei_w(v8i32 _1) { return __lasx_xvslei_w(_1, 1); } ++// CHECK-LABEL: @xvslei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvslei_d(v4i64 _1) { return __lasx_xvslei_d(_1, 1); } ++// CHECK-LABEL: @xvsle_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsle_bu(_1, _2); } ++// CHECK-LABEL: @xvsle_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsle_hu(_1, _2); } ++// CHECK-LABEL: @xvsle_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsle_wu(_1, _2); } ++// CHECK-LABEL: @xvsle_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __lasx_xvsle_du(_1, _2); } ++// CHECK-LABEL: @xvslei_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvslei_bu(v32u8 _1) { return __lasx_xvslei_bu(_1, 1); } ++// CHECK-LABEL: @xvslei_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvslei_hu(v16u16 _1) { return __lasx_xvslei_hu(_1, 1); } ++// CHECK-LABEL: @xvslei_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvslei_wu(v8u32 _1) { return __lasx_xvslei_wu(_1, 1); } ++// CHECK-LABEL: @xvslei_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvslei_du(v4u64 _1) { return __lasx_xvslei_du(_1, 1); } ++// CHECK-LABEL: @xvsat_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsat_b(v32i8 _1) { return __lasx_xvsat_b(_1, 1); } ++// CHECK-LABEL: @xvsat_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsat_h(v16i16 _1) { return __lasx_xvsat_h(_1, 1); } ++// CHECK-LABEL: @xvsat_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsat_w(v8i32 _1) { return __lasx_xvsat_w(_1, 1); } ++// CHECK-LABEL: @xvsat_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsat_d(v4i64 _1) { return __lasx_xvsat_d(_1, 1); } ++// CHECK-LABEL: @xvsat_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvsat_bu(v32u8 _1) { return __lasx_xvsat_bu(_1, 1); } ++// CHECK-LABEL: @xvsat_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvsat_hu(v16u16 _1) { return __lasx_xvsat_hu(_1, 1); } ++// CHECK-LABEL: @xvsat_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvsat_wu(v8u32 _1) { return __lasx_xvsat_wu(_1, 1); } ++// CHECK-LABEL: @xvsat_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvsat_du(v4u64 _1) { return __lasx_xvsat_du(_1, 1); } ++// CHECK-LABEL: @xvadda_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __lasx_xvadda_b(_1, _2); } ++// CHECK-LABEL: @xvadda_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __lasx_xvadda_h(_1, _2); } ++// CHECK-LABEL: @xvadda_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __lasx_xvadda_w(_1, _2); } ++// CHECK-LABEL: @xvadda_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __lasx_xvadda_d(_1, _2); } ++// CHECK-LABEL: @xvsadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvsadd_b(_1, _2); } ++// CHECK-LABEL: @xvsadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvsadd_h(_1, _2); } ++// CHECK-LABEL: @xvsadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvsadd_w(_1, _2); } ++// CHECK-LABEL: @xvsadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvsadd_d(_1, _2); } ++// CHECK-LABEL: @xvsadd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsadd_bu(_1, _2); } ++// CHECK-LABEL: @xvsadd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsadd_hu(_1, _2); } ++// CHECK-LABEL: @xvsadd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsadd_wu(_1, _2); } ++// CHECK-LABEL: @xvsadd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __lasx_xvsadd_du(_1, _2); } ++// CHECK-LABEL: @xvavg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __lasx_xvavg_b(_1, _2); } ++// CHECK-LABEL: @xvavg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __lasx_xvavg_h(_1, _2); } ++// CHECK-LABEL: @xvavg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __lasx_xvavg_w(_1, _2); } ++// CHECK-LABEL: @xvavg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __lasx_xvavg_d(_1, _2); } ++// CHECK-LABEL: @xvavg_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavg_bu(_1, _2); } ++// CHECK-LABEL: @xvavg_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavg_hu(_1, _2); } ++// CHECK-LABEL: @xvavg_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavg_wu(_1, _2); } ++// CHECK-LABEL: @xvavg_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __lasx_xvavg_du(_1, _2); } ++// CHECK-LABEL: @xvavgr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __lasx_xvavgr_b(_1, _2); } ++// CHECK-LABEL: @xvavgr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __lasx_xvavgr_h(_1, _2); } ++// CHECK-LABEL: @xvavgr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __lasx_xvavgr_w(_1, _2); } ++// CHECK-LABEL: @xvavgr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __lasx_xvavgr_d(_1, _2); } ++// CHECK-LABEL: @xvavgr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavgr_bu(_1, _2); } ++// CHECK-LABEL: @xvavgr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavgr_hu(_1, _2); } ++// CHECK-LABEL: @xvavgr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavgr_wu(_1, _2); } ++// CHECK-LABEL: @xvavgr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __lasx_xvavgr_du(_1, _2); } ++// CHECK-LABEL: @xvssub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __lasx_xvssub_b(_1, _2); } ++// CHECK-LABEL: @xvssub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __lasx_xvssub_h(_1, _2); } ++// CHECK-LABEL: @xvssub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __lasx_xvssub_w(_1, _2); } ++// CHECK-LABEL: @xvssub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __lasx_xvssub_d(_1, _2); } ++// CHECK-LABEL: @xvssub_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __lasx_xvssub_bu(_1, _2); } ++// CHECK-LABEL: @xvssub_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __lasx_xvssub_hu(_1, _2); } ++// CHECK-LABEL: @xvssub_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __lasx_xvssub_wu(_1, _2); } ++// CHECK-LABEL: @xvssub_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __lasx_xvssub_du(_1, _2); } ++// CHECK-LABEL: @xvabsd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __lasx_xvabsd_b(_1, _2); } ++// CHECK-LABEL: @xvabsd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __lasx_xvabsd_h(_1, _2); } ++// CHECK-LABEL: @xvabsd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __lasx_xvabsd_w(_1, _2); } ++// CHECK-LABEL: @xvabsd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __lasx_xvabsd_d(_1, _2); } ++// CHECK-LABEL: @xvabsd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvabsd_bu(_1, _2); } ++// CHECK-LABEL: @xvabsd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvabsd_hu(_1, _2); } ++// CHECK-LABEL: @xvabsd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvabsd_wu(_1, _2); } ++// CHECK-LABEL: @xvabsd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __lasx_xvabsd_du(_1, _2); } ++// CHECK-LABEL: @xvmul_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __lasx_xvmul_b(_1, _2); } ++// CHECK-LABEL: @xvmul_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __lasx_xvmul_h(_1, _2); } ++// CHECK-LABEL: @xvmul_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __lasx_xvmul_w(_1, _2); } ++// CHECK-LABEL: @xvmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __lasx_xvmul_d(_1, _2); } ++// CHECK-LABEL: @xvmadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmadd_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmadd_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmadd_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmsub_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmsub_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmsub_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvdiv_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __lasx_xvdiv_b(_1, _2); } ++// CHECK-LABEL: @xvdiv_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __lasx_xvdiv_h(_1, _2); } ++// CHECK-LABEL: @xvdiv_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __lasx_xvdiv_w(_1, _2); } ++// CHECK-LABEL: @xvdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __lasx_xvdiv_d(_1, _2); } ++// CHECK-LABEL: @xvdiv_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __lasx_xvdiv_bu(_1, _2); } ++// CHECK-LABEL: @xvdiv_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __lasx_xvdiv_hu(_1, _2); } ++// CHECK-LABEL: @xvdiv_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __lasx_xvdiv_wu(_1, _2); } ++// CHECK-LABEL: @xvdiv_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __lasx_xvdiv_du(_1, _2); } ++// CHECK-LABEL: @xvhaddw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhaddw_h_b(_1, _2); } ++// CHECK-LABEL: @xvhaddw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhaddw_w_h(_1, _2); } ++// CHECK-LABEL: @xvhaddw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhaddw_d_w(_1, _2); } ++// CHECK-LABEL: @xvhaddw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhaddw_hu_bu(_1, _2); } ++// CHECK-LABEL: @xvhaddw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhaddw_wu_hu(_1, _2); } ++// CHECK-LABEL: @xvhaddw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhaddw_du_wu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhsubw_h_b(_1, _2); } ++// CHECK-LABEL: @xvhsubw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhsubw_w_h(_1, _2); } ++// CHECK-LABEL: @xvhsubw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhsubw_d_w(_1, _2); } ++// CHECK-LABEL: @xvhsubw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhsubw_hu_bu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhsubw_wu_hu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhsubw_du_wu(_1, _2); } ++// CHECK-LABEL: @xvmod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __lasx_xvmod_b(_1, _2); } ++// CHECK-LABEL: @xvmod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __lasx_xvmod_h(_1, _2); } ++// CHECK-LABEL: @xvmod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __lasx_xvmod_w(_1, _2); } ++// CHECK-LABEL: @xvmod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __lasx_xvmod_d(_1, _2); } ++// CHECK-LABEL: @xvmod_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmod_bu(_1, _2); } ++// CHECK-LABEL: @xvmod_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmod_hu(_1, _2); } ++// CHECK-LABEL: @xvmod_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmod_wu(_1, _2); } ++// CHECK-LABEL: @xvmod_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __lasx_xvmod_du(_1, _2); } ++// CHECK-LABEL: @xvrepl128vei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvrepl128vei_b(v32i8 _1) { return __lasx_xvrepl128vei_b(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvrepl128vei_h(v16i16 _1) { return __lasx_xvrepl128vei_h(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvrepl128vei_w(v8i32 _1) { return __lasx_xvrepl128vei_w(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvrepl128vei_d(v4i64 _1) { return __lasx_xvrepl128vei_d(_1, 1); } ++// CHECK-LABEL: @xvpickev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickev_b(_1, _2); } ++// CHECK-LABEL: @xvpickev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickev_h(_1, _2); } ++// CHECK-LABEL: @xvpickev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickev_w(_1, _2); } ++// CHECK-LABEL: @xvpickev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickev_d(_1, _2); } ++// CHECK-LABEL: @xvpickod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickod_b(_1, _2); } ++// CHECK-LABEL: @xvpickod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickod_h(_1, _2); } ++// CHECK-LABEL: @xvpickod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickod_w(_1, _2); } ++// CHECK-LABEL: @xvpickod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickod_d(_1, _2); } ++// CHECK-LABEL: @xvilvh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvh_b(_1, _2); } ++// CHECK-LABEL: @xvilvh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvh_h(_1, _2); } ++// CHECK-LABEL: @xvilvh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvh_w(_1, _2); } ++// CHECK-LABEL: @xvilvh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvh_d(_1, _2); } ++// CHECK-LABEL: @xvilvl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvl_b(_1, _2); } ++// CHECK-LABEL: @xvilvl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvl_h(_1, _2); } ++// CHECK-LABEL: @xvilvl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvl_w(_1, _2); } ++// CHECK-LABEL: @xvilvl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvl_d(_1, _2); } ++// CHECK-LABEL: @xvpackev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackev_b(_1, _2); } ++// CHECK-LABEL: @xvpackev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackev_h(_1, _2); } ++// CHECK-LABEL: @xvpackev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackev_w(_1, _2); } ++// CHECK-LABEL: @xvpackev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackev_d(_1, _2); } ++// CHECK-LABEL: @xvpackod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackod_b(_1, _2); } ++// CHECK-LABEL: @xvpackod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackod_h(_1, _2); } ++// CHECK-LABEL: @xvpackod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackod_w(_1, _2); } ++// CHECK-LABEL: @xvpackod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackod_d(_1, _2); } ++// CHECK-LABEL: @xvshuf_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvshuf_b(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvshuf_h(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvshuf_w(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvshuf_d(_1, _2, _3); } ++// CHECK-LABEL: @xvand_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __lasx_xvand_v(_1, _2); } ++// CHECK-LABEL: @xvandi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvandi_b(v32u8 _1) { return __lasx_xvandi_b(_1, 1); } ++// CHECK-LABEL: @xvor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __lasx_xvor_v(_1, _2); } ++// CHECK-LABEL: @xvori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvori_b(v32u8 _1) { return __lasx_xvori_b(_1, 1); } ++// CHECK-LABEL: @xvnor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __lasx_xvnor_v(_1, _2); } ++// CHECK-LABEL: @xvnori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvnori_b(v32u8 _1) { return __lasx_xvnori_b(_1, 1); } ++// CHECK-LABEL: @xvxor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __lasx_xvxor_v(_1, _2); } ++// CHECK-LABEL: @xvxori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvxori_b(v32u8 _1) { return __lasx_xvxori_b(_1, 1); } ++// CHECK-LABEL: @xvbitsel_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __lasx_xvbitsel_v(_1, _2, _3); } ++// CHECK-LABEL: @xvbitseli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitseli_b(_1, _2, 1); } ++// CHECK-LABEL: @xvshuf4i_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvshuf4i_b(v32i8 _1) { return __lasx_xvshuf4i_b(_1, 1); } ++// CHECK-LABEL: @xvshuf4i_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvshuf4i_h(v16i16 _1) { return __lasx_xvshuf4i_h(_1, 1); } ++// CHECK-LABEL: @xvshuf4i_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); } ++// CHECK-LABEL: @xvreplgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); } ++// CHECK-LABEL: @xvreplgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); } ++// CHECK-LABEL: @xvreplgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); } ++// CHECK-LABEL: @xvreplgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: CONV:%.* = sext i32 _1:%.* to i64 ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 CONV) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvreplgr2vr_d(int _1) { return __lasx_xvreplgr2vr_d(_1); } ++// CHECK-LABEL: @xvpcnt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvpcnt_b(v32i8 _1) { return __lasx_xvpcnt_b(_1); } ++// CHECK-LABEL: @xvpcnt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvpcnt_h(v16i16 _1) { return __lasx_xvpcnt_h(_1); } ++// CHECK-LABEL: @xvpcnt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvpcnt_w(v8i32 _1) { return __lasx_xvpcnt_w(_1); } ++// CHECK-LABEL: @xvpcnt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvpcnt_d(v4i64 _1) { return __lasx_xvpcnt_d(_1); } ++// CHECK-LABEL: @xvclo_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvclo_b(v32i8 _1) { return __lasx_xvclo_b(_1); } ++// CHECK-LABEL: @xvclo_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvclo_h(v16i16 _1) { return __lasx_xvclo_h(_1); } ++// CHECK-LABEL: @xvclo_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvclo_w(v8i32 _1) { return __lasx_xvclo_w(_1); } ++// CHECK-LABEL: @xvclo_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvclo_d(v4i64 _1) { return __lasx_xvclo_d(_1); } ++// CHECK-LABEL: @xvclz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvclz_b(v32i8 _1) { return __lasx_xvclz_b(_1); } ++// CHECK-LABEL: @xvclz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvclz_h(v16i16 _1) { return __lasx_xvclz_h(_1); } ++// CHECK-LABEL: @xvclz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvclz_w(v8i32 _1) { return __lasx_xvclz_w(_1); } ++// CHECK-LABEL: @xvclz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvclz_d(v4i64 _1) { return __lasx_xvclz_d(_1); } ++// CHECK-LABEL: @xvfadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __lasx_xvfadd_s(_1, _2); } ++// CHECK-LABEL: @xvfadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __lasx_xvfadd_d(_1, _2); } ++// CHECK-LABEL: @xvfsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __lasx_xvfsub_s(_1, _2); } ++// CHECK-LABEL: @xvfsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __lasx_xvfsub_d(_1, _2); } ++// CHECK-LABEL: @xvfmul_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmul_s(_1, _2); } ++// CHECK-LABEL: @xvfmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmul_d(_1, _2); } ++// CHECK-LABEL: @xvfdiv_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __lasx_xvfdiv_s(_1, _2); } ++// CHECK-LABEL: @xvfdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __lasx_xvfdiv_d(_1, _2); } ++// CHECK-LABEL: @xvfcvt_h_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcvt_h_s(_1, _2); } ++// CHECK-LABEL: @xvfcvt_s_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcvt_s_d(_1, _2); } ++// CHECK-LABEL: @xvfmin_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmin_s(_1, _2); } ++// CHECK-LABEL: @xvfmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmin_d(_1, _2); } ++// CHECK-LABEL: @xvfmina_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmina_s(_1, _2); } ++// CHECK-LABEL: @xvfmina_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmina_d(_1, _2); } ++// CHECK-LABEL: @xvfmax_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmax_s(_1, _2); } ++// CHECK-LABEL: @xvfmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmax_d(_1, _2); } ++// CHECK-LABEL: @xvfmaxa_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmaxa_s(_1, _2); } ++// CHECK-LABEL: @xvfmaxa_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmaxa_d(_1, _2); } ++// CHECK-LABEL: @xvfclass_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfclass_s(v8f32 _1) { return __lasx_xvfclass_s(_1); } ++// CHECK-LABEL: @xvfclass_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfclass_d(v4f64 _1) { return __lasx_xvfclass_d(_1); } ++// CHECK-LABEL: @xvfsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfsqrt_s(v8f32 _1) { return __lasx_xvfsqrt_s(_1); } ++// CHECK-LABEL: @xvfsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfsqrt_d(v4f64 _1) { return __lasx_xvfsqrt_d(_1); } ++// CHECK-LABEL: @xvfrecip_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfrecip_s(v8f32 _1) { return __lasx_xvfrecip_s(_1); } ++// CHECK-LABEL: @xvfrecip_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfrecip_d(v4f64 _1) { return __lasx_xvfrecip_d(_1); } ++// CHECK-LABEL: @xvfrint_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfrint_s(v8f32 _1) { return __lasx_xvfrint_s(_1); } ++// CHECK-LABEL: @xvfrint_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfrint_d(v4f64 _1) { return __lasx_xvfrint_d(_1); } ++// CHECK-LABEL: @xvfrsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfrsqrt_s(v8f32 _1) { return __lasx_xvfrsqrt_s(_1); } ++// CHECK-LABEL: @xvfrsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfrsqrt_d(v4f64 _1) { return __lasx_xvfrsqrt_d(_1); } ++// CHECK-LABEL: @xvflogb_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvflogb_s(v8f32 _1) { return __lasx_xvflogb_s(_1); } ++// CHECK-LABEL: @xvflogb_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvflogb_d(v4f64 _1) { return __lasx_xvflogb_d(_1); } ++// CHECK-LABEL: @xvfcvth_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfcvth_s_h(v16i16 _1) { return __lasx_xvfcvth_s_h(_1); } ++// CHECK-LABEL: @xvfcvth_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfcvth_d_s(v8f32 _1) { return __lasx_xvfcvth_d_s(_1); } ++// CHECK-LABEL: @xvfcvtl_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfcvtl_s_h(v16i16 _1) { return __lasx_xvfcvtl_s_h(_1); } ++// CHECK-LABEL: @xvfcvtl_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfcvtl_d_s(v8f32 _1) { return __lasx_xvfcvtl_d_s(_1); } ++// CHECK-LABEL: @xvftint_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftint_w_s(v8f32 _1) { return __lasx_xvftint_w_s(_1); } ++// CHECK-LABEL: @xvftint_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftint_l_d(v4f64 _1) { return __lasx_xvftint_l_d(_1); } ++// CHECK-LABEL: @xvftint_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvftint_wu_s(v8f32 _1) { return __lasx_xvftint_wu_s(_1); } ++// CHECK-LABEL: @xvftint_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvftint_lu_d(v4f64 _1) { return __lasx_xvftint_lu_d(_1); } ++// CHECK-LABEL: @xvftintrz_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftintrz_w_s(v8f32 _1) { return __lasx_xvftintrz_w_s(_1); } ++// CHECK-LABEL: @xvftintrz_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrz_l_d(v4f64 _1) { return __lasx_xvftintrz_l_d(_1); } ++// CHECK-LABEL: @xvftintrz_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvftintrz_wu_s(v8f32 _1) { return __lasx_xvftintrz_wu_s(_1); } ++// CHECK-LABEL: @xvftintrz_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvftintrz_lu_d(v4f64 _1) { return __lasx_xvftintrz_lu_d(_1); } ++// CHECK-LABEL: @xvffint_s_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvffint_s_w(v8i32 _1) { return __lasx_xvffint_s_w(_1); } ++// CHECK-LABEL: @xvffint_d_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvffint_d_l(v4i64 _1) { return __lasx_xvffint_d_l(_1); } ++// CHECK-LABEL: @xvffint_s_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvffint_s_wu(v8u32 _1) { return __lasx_xvffint_s_wu(_1); } ++// CHECK-LABEL: @xvffint_d_lu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvffint_d_lu(v4u64 _1) { return __lasx_xvffint_d_lu(_1); } ++// CHECK-LABEL: @xvreplve_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> _1:%.*, i32 _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvreplve_b(v32i8 _1, int _2) { return __lasx_xvreplve_b(_1, _2); } ++// CHECK-LABEL: @xvreplve_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> _1:%.*, i32 _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvreplve_h(v16i16 _1, int _2) { return __lasx_xvreplve_h(_1, _2); } ++// CHECK-LABEL: @xvreplve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> _1:%.*, i32 _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvreplve_w(v8i32 _1, int _2) { return __lasx_xvreplve_w(_1, _2); } ++// CHECK-LABEL: @xvreplve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> _1:%.*, i32 _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvreplve_d(v4i64 _1, int _2) { return __lasx_xvreplve_d(_1, _2); } ++// CHECK-LABEL: @xvpermi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __lasx_xvpermi_w(_1, _2, 1); } ++// CHECK-LABEL: @xvandn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __lasx_xvandn_v(_1, _2); } ++// CHECK-LABEL: @xvneg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvneg_b(v32i8 _1) { return __lasx_xvneg_b(_1); } ++// CHECK-LABEL: @xvneg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvneg_h(v16i16 _1) { return __lasx_xvneg_h(_1); } ++// CHECK-LABEL: @xvneg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvneg_w(v8i32 _1) { return __lasx_xvneg_w(_1); } ++// CHECK-LABEL: @xvneg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvneg_d(v4i64 _1) { return __lasx_xvneg_d(_1); } ++// CHECK-LABEL: @xvmuh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __lasx_xvmuh_b(_1, _2); } ++// CHECK-LABEL: @xvmuh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __lasx_xvmuh_h(_1, _2); } ++// CHECK-LABEL: @xvmuh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __lasx_xvmuh_w(_1, _2); } ++// CHECK-LABEL: @xvmuh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __lasx_xvmuh_d(_1, _2); } ++// CHECK-LABEL: @xvmuh_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmuh_bu(_1, _2); } ++// CHECK-LABEL: @xvmuh_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmuh_hu(_1, _2); } ++// CHECK-LABEL: @xvmuh_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmuh_wu(_1, _2); } ++// CHECK-LABEL: @xvmuh_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __lasx_xvmuh_du(_1, _2); } ++// CHECK-LABEL: @xvsllwil_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsllwil_h_b(v32i8 _1) { return __lasx_xvsllwil_h_b(_1, 1); } ++// CHECK-LABEL: @xvsllwil_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsllwil_w_h(v16i16 _1) { return __lasx_xvsllwil_w_h(_1, 1); } ++// CHECK-LABEL: @xvsllwil_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsllwil_d_w(v8i32 _1) { return __lasx_xvsllwil_d_w(_1, 1); } ++// CHECK-LABEL: @xvsllwil_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvsllwil_hu_bu(v32u8 _1) { return __lasx_xvsllwil_hu_bu(_1, 1); } ++// CHECK-LABEL: @xvsllwil_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvsllwil_wu_hu(v16u16 _1) { return __lasx_xvsllwil_wu_hu(_1, 1); } ++// CHECK-LABEL: @xvsllwil_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvsllwil_du_wu(v8u32 _1) { return __lasx_xvsllwil_du_wu(_1, 1); } ++// CHECK-LABEL: @xvsran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsran_b_h(_1, _2); } ++// CHECK-LABEL: @xvsran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsran_h_w(_1, _2); } ++// CHECK-LABEL: @xvsran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsran_w_d(_1, _2); } ++// CHECK-LABEL: @xvssran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssran_b_h(_1, _2); } ++// CHECK-LABEL: @xvssran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssran_h_w(_1, _2); } ++// CHECK-LABEL: @xvssran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssran_w_d(_1, _2); } ++// CHECK-LABEL: @xvssran_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssran_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssran_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssran_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssran_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssran_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrarn_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrarn_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrarn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrarn_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrarn_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrarn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrarn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrarn_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrarn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrarn_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrarn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrarn_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrln_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrln_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrln_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrln_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrln_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrln_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrln_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrln_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrln_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrlrn_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrlrn_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrlrn_wu_d(_1, _2); } ++// CHECK-LABEL: @xvfrstpi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __lasx_xvfrstpi_b(_1, _2, 1); } ++// CHECK-LABEL: @xvfrstpi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __lasx_xvfrstpi_h(_1, _2, 1); } ++// CHECK-LABEL: @xvfrstp_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvfrstp_b(_1, _2, _3); } ++// CHECK-LABEL: @xvfrstp_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvfrstp_h(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf4i_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __lasx_xvshuf4i_d(_1, _2, 1); } ++// CHECK-LABEL: @xvbsrl_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvbsrl_v(v32i8 _1) { return __lasx_xvbsrl_v(_1, 1); } ++// CHECK-LABEL: @xvbsll_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvbsll_v(v32i8 _1) { return __lasx_xvbsll_v(_1, 1); } ++// CHECK-LABEL: @xvextrins_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __lasx_xvextrins_b(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __lasx_xvextrins_h(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __lasx_xvextrins_w(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __lasx_xvextrins_d(_1, _2, 1); } ++// CHECK-LABEL: @xvmskltz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmskltz_b(v32i8 _1) { return __lasx_xvmskltz_b(_1); } ++// CHECK-LABEL: @xvmskltz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmskltz_h(v16i16 _1) { return __lasx_xvmskltz_h(_1); } ++// CHECK-LABEL: @xvmskltz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmskltz_w(v8i32 _1) { return __lasx_xvmskltz_w(_1); } ++// CHECK-LABEL: @xvmskltz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmskltz_d(v4i64 _1) { return __lasx_xvmskltz_d(_1); } ++// CHECK-LABEL: @xvsigncov_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __lasx_xvsigncov_b(_1, _2); } ++// CHECK-LABEL: @xvsigncov_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __lasx_xvsigncov_h(_1, _2); } ++// CHECK-LABEL: @xvsigncov_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __lasx_xvsigncov_w(_1, _2); } ++// CHECK-LABEL: @xvsigncov_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __lasx_xvsigncov_d(_1, _2); } ++// CHECK-LABEL: @xvfmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> _1:%.*, <8 x float> _2:%.*, <8 x float> _3:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmadd_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> _1:%.*, <4 x double> _2:%.*, <4 x double> _3:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> _1:%.*, <8 x float> _2:%.*, <8 x float> _3:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmsub_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> _1:%.*, <4 x double> _2:%.*, <4 x double> _3:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> _1:%.*, <8 x float> _2:%.*, <8 x float> _3:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmadd_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> _1:%.*, <4 x double> _2:%.*, <4 x double> _3:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> _1:%.*, <8 x float> _2:%.*, <8 x float> _3:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmsub_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> _1:%.*, <4 x double> _2:%.*, <4 x double> _3:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvftintrne_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftintrne_w_s(v8f32 _1) { return __lasx_xvftintrne_w_s(_1); } ++// CHECK-LABEL: @xvftintrne_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrne_l_d(v4f64 _1) { return __lasx_xvftintrne_l_d(_1); } ++// CHECK-LABEL: @xvftintrp_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftintrp_w_s(v8f32 _1) { return __lasx_xvftintrp_w_s(_1); } ++// CHECK-LABEL: @xvftintrp_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrp_l_d(v4f64 _1) { return __lasx_xvftintrp_l_d(_1); } ++// CHECK-LABEL: @xvftintrm_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftintrm_w_s(v8f32 _1) { return __lasx_xvftintrm_w_s(_1); } ++// CHECK-LABEL: @xvftintrm_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrm_l_d(v4f64 _1) { return __lasx_xvftintrm_l_d(_1); } ++// CHECK-LABEL: @xvftint_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftint_w_d(_1, _2); } ++// CHECK-LABEL: @xvffint_s_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __lasx_xvffint_s_l(_1, _2); } ++// CHECK-LABEL: @xvftintrz_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrz_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrp_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrp_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrm_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrm_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrne_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrne_w_d(_1, _2); } ++// CHECK-LABEL: @xvftinth_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftinth_l_s(v8f32 _1) { return __lasx_xvftinth_l_s(_1); } ++// CHECK-LABEL: @xvftintl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintl_l_s(v8f32 _1) { return __lasx_xvftintl_l_s(_1); } ++// CHECK-LABEL: @xvffinth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvffinth_d_w(v8i32 _1) { return __lasx_xvffinth_d_w(_1); } ++// CHECK-LABEL: @xvffintl_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvffintl_d_w(v8i32 _1) { return __lasx_xvffintl_d_w(_1); } ++// CHECK-LABEL: @xvftintrzh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrzh_l_s(v8f32 _1) { return __lasx_xvftintrzh_l_s(_1); } ++// CHECK-LABEL: @xvftintrzl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrzl_l_s(v8f32 _1) { return __lasx_xvftintrzl_l_s(_1); } ++// CHECK-LABEL: @xvftintrph_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrph_l_s(v8f32 _1) { return __lasx_xvftintrph_l_s(_1); } ++// CHECK-LABEL: @xvftintrpl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrpl_l_s(v8f32 _1) { return __lasx_xvftintrpl_l_s(_1); } ++// CHECK-LABEL: @xvftintrmh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrmh_l_s(v8f32 _1) { return __lasx_xvftintrmh_l_s(_1); } ++// CHECK-LABEL: @xvftintrml_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrml_l_s(v8f32 _1) { return __lasx_xvftintrml_l_s(_1); } ++// CHECK-LABEL: @xvftintrneh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrneh_l_s(v8f32 _1) { return __lasx_xvftintrneh_l_s(_1); } ++// CHECK-LABEL: @xvftintrnel_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrnel_l_s(v8f32 _1) { return __lasx_xvftintrnel_l_s(_1); } ++// CHECK-LABEL: @xvfrintrne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <8 x float> TMP0 to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> TMP1 ++// ++v8i32 xvfrintrne_s(v8f32 _1) { return __lasx_xvfrintrne_s(_1); } ++// CHECK-LABEL: @xvfrintrne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x double> TMP0 to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> TMP1 ++// ++v4i64 xvfrintrne_d(v4f64 _1) { return __lasx_xvfrintrne_d(_1); } ++// CHECK-LABEL: @xvfrintrz_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <8 x float> TMP0 to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> TMP1 ++// ++v8i32 xvfrintrz_s(v8f32 _1) { return __lasx_xvfrintrz_s(_1); } ++// CHECK-LABEL: @xvfrintrz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x double> TMP0 to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> TMP1 ++// ++v4i64 xvfrintrz_d(v4f64 _1) { return __lasx_xvfrintrz_d(_1); } ++// CHECK-LABEL: @xvfrintrp_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <8 x float> TMP0 to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> TMP1 ++// ++v8i32 xvfrintrp_s(v8f32 _1) { return __lasx_xvfrintrp_s(_1); } ++// CHECK-LABEL: @xvfrintrp_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x double> TMP0 to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> TMP1 ++// ++v4i64 xvfrintrp_d(v4f64 _1) { return __lasx_xvfrintrp_d(_1); } ++// CHECK-LABEL: @xvfrintrm_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <8 x float> TMP0 to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> TMP1 ++// ++v8i32 xvfrintrm_s(v8f32 _1) { return __lasx_xvfrintrm_s(_1); } ++// CHECK-LABEL: @xvfrintrm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x double> TMP0 to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> TMP1 ++// ++v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); } ++// CHECK-LABEL: @xvld( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvld(void * _1) { return __lasx_xvld(_1, 1); } ++// CHECK-LABEL: @xvst( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> _1:%.*, ptr _2:%.*, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvst(v32i8 _1, void * _2) { return __lasx_xvst(_1, _2, 1); } ++// CHECK-LABEL: @xvstelm_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> _1:%.*, ptr _2:%.*, i32 1, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_b(v32i8 _1, void * _2) { return __lasx_xvstelm_b(_1, _2, 1, 1); } ++// CHECK-LABEL: @xvstelm_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> _1:%.*, ptr _2:%.*, i32 2, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_h(v16i16 _1, void * _2) { return __lasx_xvstelm_h(_1, _2, 2, 1); } ++// CHECK-LABEL: @xvstelm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> _1:%.*, ptr _2:%.*, i32 4, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_w(v8i32 _1, void * _2) { return __lasx_xvstelm_w(_1, _2, 4, 1); } ++// CHECK-LABEL: @xvstelm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> _1:%.*, ptr _2:%.*, i32 8, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_d(v4i64 _1, void * _2) { return __lasx_xvstelm_d(_1, _2, 8, 1); } ++// CHECK-LABEL: @xvinsve0_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __lasx_xvinsve0_w(_1, _2, 1); } ++// CHECK-LABEL: @xvinsve0_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __lasx_xvinsve0_d(_1, _2, 1); } ++// CHECK-LABEL: @xvpickve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvpickve_w(v8i32 _1) { return __lasx_xvpickve_w(_1, 1); } ++// CHECK-LABEL: @xvpickve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvpickve_d(v4i64 _1) { return __lasx_xvpickve_d(_1, 1); } ++// CHECK-LABEL: @xvssrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrln_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrln_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrln_w_d(_1, _2); } ++// CHECK-LABEL: @xvorn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); } ++// CHECK-LABEL: @xvldi( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvldi() { return __lasx_xvldi(1); } ++// CHECK-LABEL: @xvldx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr _1:%.*, i64 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvldx(void * _1) { return __lasx_xvldx(_1, 1); } ++// CHECK-LABEL: @xvstx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> _1:%.*, ptr _2:%.*, i64 1) ++// CHECK-NEXT: ret void ++// ++void xvstx(v32i8 _1, void * _2) { return __lasx_xvstx(_1, _2, 1); } ++// CHECK-LABEL: @xvextl_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvextl_qu_du(v4u64 _1) { return __lasx_xvextl_qu_du(_1); } ++// CHECK-LABEL: @xvinsgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> _1:%.*, i32 1, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvinsgr2vr_w(v8i32 _1) { return __lasx_xvinsgr2vr_w(_1, 1, 1); } ++// CHECK-LABEL: @xvinsgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> _1:%.*, i64 1, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvinsgr2vr_d(v4i64 _1) { return __lasx_xvinsgr2vr_d(_1, 1, 1); } ++// CHECK-LABEL: @xvreplve0_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvreplve0_b(v32i8 _1) { return __lasx_xvreplve0_b(_1); } ++// CHECK-LABEL: @xvreplve0_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvreplve0_h(v16i16 _1) { return __lasx_xvreplve0_h(_1); } ++// CHECK-LABEL: @xvreplve0_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvreplve0_w(v8i32 _1) { return __lasx_xvreplve0_w(_1); } ++// CHECK-LABEL: @xvreplve0_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvreplve0_d(v4i64 _1) { return __lasx_xvreplve0_d(_1); } ++// CHECK-LABEL: @xvreplve0_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvreplve0_q(v32i8 _1) { return __lasx_xvreplve0_q(_1); } ++// CHECK-LABEL: @vext2xv_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 vext2xv_h_b(v32i8 _1) { return __lasx_vext2xv_h_b(_1); } ++// CHECK-LABEL: @vext2xv_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 vext2xv_w_h(v16i16 _1) { return __lasx_vext2xv_w_h(_1); } ++// CHECK-LABEL: @vext2xv_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 vext2xv_d_w(v8i32 _1) { return __lasx_vext2xv_d_w(_1); } ++// CHECK-LABEL: @vext2xv_w_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 vext2xv_w_b(v32i8 _1) { return __lasx_vext2xv_w_b(_1); } ++// CHECK-LABEL: @vext2xv_d_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 vext2xv_d_h(v16i16 _1) { return __lasx_vext2xv_d_h(_1); } ++// CHECK-LABEL: @vext2xv_d_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 vext2xv_d_b(v32i8 _1) { return __lasx_vext2xv_d_b(_1); } ++// CHECK-LABEL: @vext2xv_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 vext2xv_hu_bu(v32i8 _1) { return __lasx_vext2xv_hu_bu(_1); } ++// CHECK-LABEL: @vext2xv_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 vext2xv_wu_hu(v16i16 _1) { return __lasx_vext2xv_wu_hu(_1); } ++// CHECK-LABEL: @vext2xv_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 vext2xv_du_wu(v8i32 _1) { return __lasx_vext2xv_du_wu(_1); } ++// CHECK-LABEL: @vext2xv_wu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 vext2xv_wu_bu(v32i8 _1) { return __lasx_vext2xv_wu_bu(_1); } ++// CHECK-LABEL: @vext2xv_du_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 vext2xv_du_hu(v16i16 _1) { return __lasx_vext2xv_du_hu(_1); } ++// CHECK-LABEL: @vext2xv_du_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 vext2xv_du_bu(v32i8 _1) { return __lasx_vext2xv_du_bu(_1); } ++// CHECK-LABEL: @xvpermi_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __lasx_xvpermi_q(_1, _2, 1); } ++// CHECK-LABEL: @xvpermi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvpermi_d(v4i64 _1) { return __lasx_xvpermi_d(_1, 1); } ++// CHECK-LABEL: @xvperm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); } ++// CHECK-LABEL: @xvldrepl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); } ++// CHECK-LABEL: @xvldrepl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr _1:%.*, i32 2) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); } ++// CHECK-LABEL: @xvldrepl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr _1:%.*, i32 4) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); } ++// CHECK-LABEL: @xvldrepl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr _1:%.*, i32 8) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvldrepl_d(void * _1) { return __lasx_xvldrepl_d(_1, 8); } ++// CHECK-LABEL: @xvpickve2gr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xvpickve2gr_w(v8i32 _1) { return __lasx_xvpickve2gr_w(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret i32 TMP0 ++// ++unsigned int xvpickve2gr_wu(v8i32 _1) { return __lasx_xvpickve2gr_wu(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret i64 TMP0 ++// ++long xvpickve2gr_d(v4i64 _1) { return __lasx_xvpickve2gr_d(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret i64 TMP0 ++// ++unsigned long int xvpickve2gr_du(v4i64 _1) { return __lasx_xvpickve2gr_du(_1, 1); } ++// CHECK-LABEL: @xvaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvsubwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvsubwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvsubwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvsubwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvsubwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvsubwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvsubwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvsubwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvsubwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvsubwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwev_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwev_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwev_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwev_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwev_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwev_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwod_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwod_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwod_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwod_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwod_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwod_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvhaddw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhaddw_q_d(_1, _2); } ++// CHECK-LABEL: @xvhaddw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhaddw_qu_du(_1, _2); } ++// CHECK-LABEL: @xvhsubw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhsubw_q_d(_1, _2); } ++// CHECK-LABEL: @xvhsubw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhsubw_qu_du(_1, _2); } ++// CHECK-LABEL: @xvmaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwev_q_du(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwev_d_wu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwev_w_hu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwev_h_bu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwod_q_du(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwod_d_wu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwod_w_hu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwod_h_bu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_du_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_wu_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_hu_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_bu_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_du_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_wu_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_hu_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_bu_b(_1, _2, _3); } ++// CHECK-LABEL: @xvrotr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __lasx_xvrotr_b(_1, _2); } ++// CHECK-LABEL: @xvrotr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __lasx_xvrotr_h(_1, _2); } ++// CHECK-LABEL: @xvrotr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __lasx_xvrotr_w(_1, _2); } ++// CHECK-LABEL: @xvrotr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __lasx_xvrotr_d(_1, _2); } ++// CHECK-LABEL: @xvadd_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __lasx_xvadd_q(_1, _2); } ++// CHECK-LABEL: @xvsub_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __lasx_xvsub_q(_1, _2); } ++// CHECK-LABEL: @xvaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwev_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwod_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwev_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwod_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmskgez_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmskgez_b(v32i8 _1) { return __lasx_xvmskgez_b(_1); } ++// CHECK-LABEL: @xvmsknz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmsknz_b(v32i8 _1) { return __lasx_xvmsknz_b(_1); } ++// CHECK-LABEL: @xvexth_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvexth_h_b(v32i8 _1) { return __lasx_xvexth_h_b(_1); } ++// CHECK-LABEL: @xvexth_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvexth_w_h(v16i16 _1) { return __lasx_xvexth_w_h(_1); } ++// CHECK-LABEL: @xvexth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvexth_d_w(v8i32 _1) { return __lasx_xvexth_d_w(_1); } ++// CHECK-LABEL: @xvexth_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvexth_q_d(v4i64 _1) { return __lasx_xvexth_q_d(_1); } ++// CHECK-LABEL: @xvexth_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvexth_hu_bu(v32u8 _1) { return __lasx_xvexth_hu_bu(_1); } ++// CHECK-LABEL: @xvexth_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvexth_wu_hu(v16u16 _1) { return __lasx_xvexth_wu_hu(_1); } ++// CHECK-LABEL: @xvexth_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvexth_du_wu(v8u32 _1) { return __lasx_xvexth_du_wu(_1); } ++// CHECK-LABEL: @xvexth_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvexth_qu_du(v4u64 _1) { return __lasx_xvexth_qu_du(_1); } ++// CHECK-LABEL: @xvrotri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvrotri_b(v32i8 _1) { return __lasx_xvrotri_b(_1, 1); } ++// CHECK-LABEL: @xvrotri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvrotri_h(v16i16 _1) { return __lasx_xvrotri_h(_1, 1); } ++// CHECK-LABEL: @xvrotri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvrotri_w(v8i32 _1) { return __lasx_xvrotri_w(_1, 1); } ++// CHECK-LABEL: @xvrotri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvrotri_d(v4i64 _1) { return __lasx_xvrotri_d(_1, 1); } ++// CHECK-LABEL: @xvextl_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvextl_q_d(v4i64 _1) { return __lasx_xvextl_q_d(_1); } ++// CHECK-LABEL: @xvsrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlrni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlrni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlrni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlrni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrani_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrani_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrani_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrani_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrarni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrarni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrarni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrarni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xbnz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbnz_b(v32u8 _1) { return __lasx_xbnz_b(_1); } ++// CHECK-LABEL: @xbnz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbnz_d(v4u64 _1) { return __lasx_xbnz_d(_1); } ++// CHECK-LABEL: @xbnz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbnz_h(v16u16 _1) { return __lasx_xbnz_h(_1); } ++// CHECK-LABEL: @xbnz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbnz_v(v32u8 _1) { return __lasx_xbnz_v(_1); } ++// CHECK-LABEL: @xbnz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbnz_w(v8u32 _1) { return __lasx_xbnz_w(_1); } ++// CHECK-LABEL: @xbz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbz_b(v32u8 _1) { return __lasx_xbz_b(_1); } ++// CHECK-LABEL: @xbz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbz_d(v4u64 _1) { return __lasx_xbz_d(_1); } ++// CHECK-LABEL: @xbz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbz_h(v16u16 _1) { return __lasx_xbz_h(_1); } ++// CHECK-LABEL: @xbz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbz_v(v32u8 _1) { return __lasx_xbz_v(_1); } ++// CHECK-LABEL: @xbz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbz_w(v8u32 _1) { return __lasx_xbz_w(_1); } ++// CHECK-LABEL: @xvfcmp_caf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_caf_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_caf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_caf_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_ceq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_ceq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_ceq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_ceq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cle_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cle_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_clt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_clt_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_clt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_clt_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cne_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cne_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cor_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cor_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cueq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cueq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cule_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cule_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cult_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cult_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cun_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cune_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cune_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cun_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_saf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_saf_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_saf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_saf_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_seq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_seq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_seq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_seq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sle_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sle_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_slt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_slt_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_slt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_slt_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sne_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sne_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sor_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sor_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sueq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sueq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sule_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sule_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sult_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sult_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sun_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sune_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sune_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sun_s(_1, _2); } ++// CHECK-LABEL: @xvpickve_d_f( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvpickve_d_f(v4f64 _1) { return __lasx_xvpickve_d_f(_1, 1); } ++// CHECK-LABEL: @xvpickve_w_f( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); } ++// CHECK-LABEL: @xvrepli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); } ++// CHECK-LABEL: @xvrepli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); } ++// CHECK-LABEL: @xvrepli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); } ++// CHECK-LABEL: @xvrepli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-error.c b/clang/test/CodeGen/LoongArch/lasx/builtin-error.c +new file mode 100644 +index 000000000000..724484465769 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin-error.c +@@ -0,0 +1,1392 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -verify %s ++ ++typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); ++typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); ++typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); ++typedef short v16i16 __attribute__((vector_size(32), aligned(32))); ++typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); ++typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); ++typedef int v8i32 __attribute__((vector_size(32), aligned(32))); ++typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); ++typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); ++typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); ++typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); ++typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); ++typedef float v8f32 __attribute__((vector_size(32), aligned(32))); ++typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++v32i8 xvslli_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvslli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvslli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvslli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslli_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvslli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvslli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvslli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslli_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvslli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslli_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvslli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvslli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvslli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslli_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrai_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsrai_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvsrai_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvsrai_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrai_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsrai_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsrai_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsrai_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrai_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsrai_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsrai_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsrai_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrai_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsrai_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsrai_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsrai_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrai_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrari_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsrari_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvsrari_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvsrari_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrari_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsrari_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsrari_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsrari_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrari_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsrari_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsrari_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsrari_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrari_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsrari_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsrari_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsrari_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrari_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrli_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsrli_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvsrli_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvsrli_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrli_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsrli_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsrli_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsrli_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrli_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsrli_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsrli_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsrli_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrli_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsrli_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsrli_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsrli_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrli_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlri_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsrlri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvsrlri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvsrlri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlri_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsrlri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsrlri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsrlri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlri_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsrlri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsrlri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsrlri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlri_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsrlri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsrlri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsrlri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsrlri_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitclri_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvbitclri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvbitclri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvbitclri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitclri_h(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvbitclri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvbitclri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvbitclri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitclri_w(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvbitclri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvbitclri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvbitclri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitclri_d(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvbitclri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvbitclri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvbitclri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitclri_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitseti_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvbitseti_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvbitseti_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvbitseti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitseti_h(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvbitseti_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvbitseti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvbitseti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitseti_w(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvbitseti_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvbitseti_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvbitseti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitseti_d(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvbitseti_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvbitseti_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvbitseti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitseti_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitrevi_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvbitrevi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvbitrevi_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvbitrevi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_b' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvbitrevi_h(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvbitrevi_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvbitrevi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvbitrevi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_h' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvbitrevi_w(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvbitrevi_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvbitrevi_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvbitrevi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_w' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvbitrevi_d(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvbitrevi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvbitrevi_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvbitrevi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvbitrevi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvaddi_bu(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvaddi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvaddi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvaddi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvaddi_hu(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvaddi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvaddi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvaddi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvaddi_wu(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvaddi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvaddi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvaddi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvaddi_du(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvaddi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvaddi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvaddi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvaddi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsubi_bu(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsubi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsubi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsubi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsubi_hu(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsubi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsubi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsubi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsubi_wu(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsubi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsubi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsubi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsubi_du(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsubi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsubi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsubi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsubi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvmaxi_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvmaxi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvmaxi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvmaxi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvmaxi_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvmaxi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvmaxi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvmaxi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvmaxi_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvmaxi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvmaxi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvmaxi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvmaxi_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvmaxi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvmaxi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvmaxi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvmaxi_bu(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvmaxi_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvmaxi_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvmaxi_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvmaxi_hu(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvmaxi_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvmaxi_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvmaxi_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvmaxi_wu(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvmaxi_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvmaxi_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvmaxi_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvmaxi_du(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvmaxi_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvmaxi_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvmaxi_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmaxi_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvmini_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvmini_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvmini_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvmini_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvmini_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvmini_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvmini_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvmini_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_h' must be a constant integer}}} ++ return res; ++} ++ ++v8i32 xvmini_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvmini_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvmini_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvmini_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvmini_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvmini_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvmini_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvmini_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvmini_bu(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvmini_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvmini_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvmini_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvmini_hu(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvmini_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvmini_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvmini_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvmini_wu(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvmini_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvmini_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvmini_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvmini_du(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvmini_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvmini_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvmini_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvmini_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvseqi_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvseqi_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvseqi_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvseqi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvseqi_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvseqi_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvseqi_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvseqi_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvseqi_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvseqi_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvseqi_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvseqi_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvseqi_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvseqi_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvseqi_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvseqi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvseqi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslti_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvslti_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvslti_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvslti_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslti_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvslti_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvslti_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvslti_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslti_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvslti_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvslti_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvslti_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslti_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvslti_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvslti_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvslti_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslti_bu(v32u8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvslti_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslti_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslti_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslti_hu(v16u16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvslti_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslti_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslti_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslti_wu(v8u32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvslti_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslti_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslti_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslti_du(v4u64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvslti_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslti_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslti_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslti_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslei_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvslei_b(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvslei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvslei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslei_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvslei_h(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvslei_h(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvslei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslei_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvslei_w(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvslei_w(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvslei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslei_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvslei_d(_1, -17); // expected-error {{argument value -17 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvslei_d(_1, 16); // expected-error {{argument value 16 is outside the valid range -16, 15}} ++ res |= __builtin_lasx_xvslei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvslei_bu(v32u8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvslei_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslei_bu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslei_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_bu' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvslei_hu(v16u16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvslei_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslei_hu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslei_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_hu' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvslei_wu(v8u32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvslei_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslei_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslei_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_wu' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvslei_du(v4u64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvslei_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslei_du(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvslei_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvslei_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsat_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvsat_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvsat_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvsat_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsat_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsat_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsat_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsat_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsat_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsat_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsat_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsat_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsat_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsat_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsat_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsat_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvsat_bu(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvsat_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvsat_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvsat_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_bu' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvsat_hu(v16u16 _1, int var) { ++ v16u16 res = __builtin_lasx_xvsat_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsat_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsat_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_hu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvsat_wu(v8u32 _1, int var) { ++ v8u32 res = __builtin_lasx_xvsat_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsat_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsat_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_wu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvsat_du(v4u64 _1, int var) { ++ v4u64 res = __builtin_lasx_xvsat_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsat_du(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsat_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvsat_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrepl128vei_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvrepl128vei_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvrepl128vei_b(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvrepl128vei_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrepl128vei_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvrepl128vei_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvrepl128vei_h(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvrepl128vei_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrepl128vei_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvrepl128vei_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __builtin_lasx_xvrepl128vei_w(_1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __builtin_lasx_xvrepl128vei_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrepl128vei_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvrepl128vei_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 1}} ++ res |= __builtin_lasx_xvrepl128vei_d(_1, 2); // expected-error {{argument value 2 is outside the valid range 0, 1}} ++ res |= __builtin_lasx_xvrepl128vei_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrepl128vei_d' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvandi_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvandi_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvandi_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvandi_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvandi_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvori_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvnori_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvnori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvnori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvnori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvnori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvxori_b(v32u8 _1, int var) { ++ v32u8 res = __builtin_lasx_xvxori_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvxori_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvxori_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvxori_b' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvbitseli_b(v32u8 _1, v32u8 _2, int var) { ++ v32u8 res = __builtin_lasx_xvbitseli_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvbitseli_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvbitseli_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvbitseli_b' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvshuf4i_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvshuf4i_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvshuf4i_b(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvshuf4i_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvshuf4i_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvshuf4i_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvshuf4i_h(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvshuf4i_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvshuf4i_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvshuf4i_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvshuf4i_w(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvshuf4i_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvshuf4i_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvshuf4i_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvshuf4i_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvshuf4i_d' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvpermi_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvpermi_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvpermi_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvpermi_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvpermi_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvpermi_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvpermi_d(_1, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvpermi_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpermi_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvpermi_q(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvpermi_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvpermi_q(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvpermi_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvpermi_q' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsllwil_h_b(v32i8 _1, int var) { ++ v16i16 res = __builtin_lasx_xvsllwil_h_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvsllwil_h_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvsllwil_h_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_h_b' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsllwil_w_h(v16i16 _1, int var) { ++ v8i32 res = __builtin_lasx_xvsllwil_w_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsllwil_w_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsllwil_w_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_w_h' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsllwil_d_w(v8i32 _1, int var) { ++ v4i64 res = __builtin_lasx_xvsllwil_d_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsllwil_d_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsllwil_d_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_d_w' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvsllwil_hu_bu(v32u8 _1, int var) { ++ v16u16 res = __builtin_lasx_xvsllwil_hu_bu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvsllwil_hu_bu(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvsllwil_hu_bu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_hu_bu' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvsllwil_wu_hu(v16u16 _1, int var) { ++ v8u32 res = __builtin_lasx_xvsllwil_wu_hu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsllwil_wu_hu(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsllwil_wu_hu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_wu_hu' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvsllwil_du_wu(v8u32 _1, int var) { ++ v4u64 res = __builtin_lasx_xvsllwil_du_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsllwil_du_wu(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsllwil_du_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvsllwil_du_wu' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvfrstpi_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvfrstpi_b(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvfrstpi_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvfrstpi_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvfrstpi_h(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvfrstpi_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvfrstpi_h' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvbsrl_v(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvbsrl_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvbsrl_v(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvbsrl_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsrl_v' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvbsll_v(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvbsll_v(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvbsll_v(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvbsll_v(_1, var); // expected-error {{argument to '__builtin_lasx_xvbsll_v' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvextrins_b(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvextrins_b(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvextrins_b(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvextrins_b(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvextrins_h(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvextrins_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvextrins_h(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvextrins_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvextrins_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvextrins_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvextrins_w(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvextrins_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvextrins_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvextrins_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvextrins_d(_1, _2, 256); // expected-error {{argument value 256 is outside the valid range 0, 255}} ++ res |= __builtin_lasx_xvextrins_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvextrins_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvld(void *_1, int var) { ++ v32i8 res = __builtin_lasx_xvld(_1, -2049); // expected-error {{argument value -2049 is outside the valid range -2048, 2047}} ++ res |= __builtin_lasx_xvld(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2047}} ++ res |= __builtin_lasx_xvld(_1, var); // expected-error {{argument to '__builtin_lasx_xvld' must be a constant integer}} ++ return res; ++} ++ ++void xvst(v32i8 _1, void *_2, int var) { ++ __builtin_lasx_xvst(_1, _2, -2049); // expected-error {{argument value -2049 is outside the valid range -2048, 2047}} ++ __builtin_lasx_xvst(_1, _2, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2047}} ++ __builtin_lasx_xvst(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvst' must be a constant integer}} ++} ++ ++void xvstelm_b(v32i8 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_b(_1, _2, -129, 1); // expected-error {{argument value -129 is outside the valid range -128, 127}} ++ __builtin_lasx_xvstelm_b(_1, _2, 128, 1); // expected-error {{argument value 128 is outside the valid range -128, 127}} ++ __builtin_lasx_xvstelm_b(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} ++} ++ ++void xvstelm_h(v16i16 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_h(_1, _2, -258, 1); // expected-error {{argument value -258 is outside the valid range -256, 254}} ++ __builtin_lasx_xvstelm_h(_1, _2, 256, 1); // expected-error {{argument value 256 is outside the valid range -256, 254}} ++ __builtin_lasx_xvstelm_h(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} ++} ++ ++void xvstelm_w(v8i32 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_w(_1, _2, -516, 1); // expected-error {{argument value -516 is outside the valid range -512, 508}} ++ __builtin_lasx_xvstelm_w(_1, _2, 512, 1); // expected-error {{argument value 512 is outside the valid range -512, 508}} ++ __builtin_lasx_xvstelm_w(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} ++} ++ ++void xvstelm_d(v4i64 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_d(_1, _2, -1032, 1); // expected-error {{argument value -1032 is outside the valid range -1024, 1016}} ++ __builtin_lasx_xvstelm_d(_1, _2, 1024, 1); // expected-error {{argument value 1024 is outside the valid range -1024, 1016}} ++ __builtin_lasx_xvstelm_d(_1, _2, var, 1); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} ++} ++ ++void xvstelm_b_idx(v32i8 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_b(_1, _2, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ __builtin_lasx_xvstelm_b(_1, _2, 1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ __builtin_lasx_xvstelm_b(_1, _2, 1, var); // expected-error {{argument to '__builtin_lasx_xvstelm_b' must be a constant integer}} ++} ++ ++void xvstelm_h_idx(v16i16 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_h(_1, _2, 2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ __builtin_lasx_xvstelm_h(_1, _2, 2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ __builtin_lasx_xvstelm_h(_1, _2, 2, var); // expected-error {{argument to '__builtin_lasx_xvstelm_h' must be a constant integer}} ++} ++ ++void xvstelm_w_idx(v8i32 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_w(_1, _2, 4, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ __builtin_lasx_xvstelm_w(_1, _2, 4, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ __builtin_lasx_xvstelm_w(_1, _2, 4, var); // expected-error {{argument to '__builtin_lasx_xvstelm_w' must be a constant integer}} ++} ++ ++void xvstelm_d_idx(v4i64 _1, void * _2, int var) { ++ __builtin_lasx_xvstelm_d(_1, _2, 8, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ __builtin_lasx_xvstelm_d(_1, _2, 8, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ __builtin_lasx_xvstelm_d(_1, _2, 8, var); // expected-error {{argument to '__builtin_lasx_xvstelm_d' must be a constant integer}} ++} ++ ++v8i32 xvinsve0_w(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvinsve0_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvinsve0_w(_1, _2, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvinsve0_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvinsve0_d(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvinsve0_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __builtin_lasx_xvinsve0_d(_1, _2, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __builtin_lasx_xvinsve0_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvinsve0_d' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvpickve_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvpickve_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvpickve_w(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvpickve_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvpickve_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvpickve_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __builtin_lasx_xvpickve_d(_1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __builtin_lasx_xvpickve_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvldi(int var) { ++ v4i64 res = __builtin_lasx_xvldi(-4097); // expected-error {{argument value -4097 is outside the valid range -4096, 4095}} ++ res |= __builtin_lasx_xvldi(4096); // expected-error {{argument value 4096 is outside the valid range -4096, 4095}} ++ res |= __builtin_lasx_xvldi(var); // expected-error {{argument to '__builtin_lasx_xvldi' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvinsgr2vr_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvinsgr2vr_w(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvinsgr2vr_w(_1, 1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvinsgr2vr_w(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvinsgr2vr_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvinsgr2vr_d(_1, 1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __builtin_lasx_xvinsgr2vr_d(_1, 1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __builtin_lasx_xvinsgr2vr_d(_1, 1, var); // expected-error {{argument to '__builtin_lasx_xvinsgr2vr_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvldrepl_b(void *_1, int var) { ++ v32i8 res = __builtin_lasx_xvldrepl_b(_1, -2049); // expected-error {{argument value -2049 is outside the valid range -2048, 2047}} ++ res |= __builtin_lasx_xvldrepl_b(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2047}} ++ res |= __builtin_lasx_xvldrepl_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvldrepl_h(void *_1, int var) { ++ v16i16 res = __builtin_lasx_xvldrepl_h(_1, -2050); // expected-error {{argument value -2050 is outside the valid range -2048, 2046}} ++ res |= __builtin_lasx_xvldrepl_h(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2046}} ++ res |= __builtin_lasx_xvldrepl_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvldrepl_w(void *_1, int var) { ++ v8i32 res = __builtin_lasx_xvldrepl_w(_1, -2052); // expected-error {{argument value -2052 is outside the valid range -2048, 2044}} ++ res |= __builtin_lasx_xvldrepl_w(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2044}} ++ res |= __builtin_lasx_xvldrepl_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvldrepl_d(void *_1, int var) { ++ v4i64 res = __builtin_lasx_xvldrepl_d(_1, -2056); // expected-error {{argument value -2056 is outside the valid range -2048, 2040}} ++ res |= __builtin_lasx_xvldrepl_d(_1, 2048); // expected-error {{argument value 2048 is outside the valid range -2048, 2040}} ++ res |= __builtin_lasx_xvldrepl_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvldrepl_d' must be a constant integer}} ++ return res; ++} ++ ++int xvpickve2gr_w(v8i32 _1, int var) { ++ int res = __builtin_lasx_xvpickve2gr_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvpickve2gr_w(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvpickve2gr_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_w' must be a constant integer}} ++ return res; ++} ++ ++unsigned int xvpickve2gr_wu(v8i32 _1, int var) { ++ unsigned int res = __builtin_lasx_xvpickve2gr_wu(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvpickve2gr_wu(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvpickve2gr_wu(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_wu' must be a constant integer}} ++ return res; ++} ++ ++long xvpickve2gr_d(v4i64 _1, int var) { ++ long res = __builtin_lasx_xvpickve2gr_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __builtin_lasx_xvpickve2gr_d(_1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __builtin_lasx_xvpickve2gr_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_d' must be a constant integer}} ++ return res; ++} ++ ++unsigned long int xvpickve2gr_du(v4i64 _1, int var) { ++ unsigned long int res = __builtin_lasx_xvpickve2gr_du(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res |= __builtin_lasx_xvpickve2gr_du(_1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res |= __builtin_lasx_xvpickve2gr_du(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve2gr_du' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrotri_b(v32i8 _1, int var) { ++ v32i8 res = __builtin_lasx_xvrotri_b(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvrotri_b(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res |= __builtin_lasx_xvrotri_b(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_b' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrotri_h(v16i16 _1, int var) { ++ v16i16 res = __builtin_lasx_xvrotri_h(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvrotri_h(_1, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvrotri_h(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrotri_w(v8i32 _1, int var) { ++ v8i32 res = __builtin_lasx_xvrotri_w(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvrotri_w(_1, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvrotri_w(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_w' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrotri_d(v4i64 _1, int var) { ++ v4i64 res = __builtin_lasx_xvrotri_d(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvrotri_d(_1, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvrotri_d(_1, var); // expected-error {{argument to '__builtin_lasx_xvrotri_d' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvsrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvsrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvsrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvsrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvsrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvsrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvsrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvsrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvsrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvsrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvsrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvsrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvssrlni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvssrlni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvssrlni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvssrlni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvssrlni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvssrlni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvssrlni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvssrlni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvssrlni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvssrlni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvssrlni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvssrlni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __builtin_lasx_xvssrlni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvssrlni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvssrlni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __builtin_lasx_xvssrlni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvssrlni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvssrlni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __builtin_lasx_xvssrlni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvssrlni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvssrlni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __builtin_lasx_xvssrlni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvssrlni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvssrlni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvssrlrni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvssrlrni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvssrlrni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvssrlrni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvssrlrni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvssrlrni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvssrlrni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvssrlrni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvssrlrni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvssrlrni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvssrlrni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvssrlrni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __builtin_lasx_xvssrlrni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvssrlrni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvssrlrni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __builtin_lasx_xvssrlrni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvssrlrni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvssrlrni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __builtin_lasx_xvssrlrni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvssrlrni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvssrlrni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __builtin_lasx_xvssrlrni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvssrlrni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvssrlrni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrlrni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvsrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvsrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvsrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvsrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvsrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvsrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvsrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvsrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvsrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvsrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvsrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvsrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvsrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvsrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvsrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvsrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvssrani_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvssrani_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvssrani_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvssrani_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvssrani_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvssrani_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvssrani_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvssrani_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvssrani_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvssrani_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvssrani_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvssrani_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __builtin_lasx_xvssrani_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvssrani_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvssrani_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __builtin_lasx_xvssrani_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvssrani_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvssrani_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __builtin_lasx_xvssrani_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvssrani_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvssrani_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __builtin_lasx_xvssrani_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvssrani_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvssrani_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrani_du_q' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2, int var) { ++ v32i8 res = __builtin_lasx_xvssrarni_b_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvssrarni_b_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvssrarni_b_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_b_h' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2, int var) { ++ v16i16 res = __builtin_lasx_xvssrarni_h_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvssrarni_h_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvssrarni_h_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_h_w' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2, int var) { ++ v8i32 res = __builtin_lasx_xvssrarni_w_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvssrarni_w_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvssrarni_w_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_w_d' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2, int var) { ++ v4i64 res = __builtin_lasx_xvssrarni_d_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvssrarni_d_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvssrarni_d_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_d_q' must be a constant integer}} ++ return res; ++} ++ ++v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2, int var) { ++ v32u8 res = __builtin_lasx_xvssrarni_bu_h(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvssrarni_bu_h(_1, _2, 16); // expected-error {{argument value 16 is outside the valid range 0, 15}} ++ res |= __builtin_lasx_xvssrarni_bu_h(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_bu_h' must be a constant integer}} ++ return res; ++} ++ ++v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2, int var) { ++ v16u16 res = __builtin_lasx_xvssrarni_hu_w(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvssrarni_hu_w(_1, _2, 32); // expected-error {{argument value 32 is outside the valid range 0, 31}} ++ res |= __builtin_lasx_xvssrarni_hu_w(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_hu_w' must be a constant integer}} ++ return res; ++} ++ ++v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2, int var) { ++ v8u32 res = __builtin_lasx_xvssrarni_wu_d(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvssrarni_wu_d(_1, _2, 64); // expected-error {{argument value 64 is outside the valid range 0, 63}} ++ res |= __builtin_lasx_xvssrarni_wu_d(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_wu_d' must be a constant integer}} ++ return res; ++} ++ ++v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2, int var) { ++ v4u64 res = __builtin_lasx_xvssrarni_du_q(_1, _2, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvssrarni_du_q(_1, _2, 128); // expected-error {{argument value 128 is outside the valid range 0, 127}} ++ res |= __builtin_lasx_xvssrarni_du_q(_1, _2, var); // expected-error {{argument to '__builtin_lasx_xvssrarni_du_q' must be a constant integer}} ++ return res; ++} ++ ++v4f64 xvpickve_d_f(v4f64 _1, int var) { ++ v4f64 res = __builtin_lasx_xvpickve_d_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 3}} ++ res += __builtin_lasx_xvpickve_d_f(_1, 4); // expected-error {{argument value 4 is outside the valid range 0, 3}} ++ res += __builtin_lasx_xvpickve_d_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_d_f' must be a constant integer}} ++ return res; ++} ++ ++v8f32 xvpickve_w_f(v8f32 _1, int var) { ++ v8f32 res = __builtin_lasx_xvpickve_w_f(_1, -1); // expected-error {{argument value 4294967295 is outside the valid range 0, 7}} ++ res += __builtin_lasx_xvpickve_w_f(_1, 8); // expected-error {{argument value 8 is outside the valid range 0, 7}} ++ res += __builtin_lasx_xvpickve_w_f(_1, var); // expected-error {{argument to '__builtin_lasx_xvpickve_w_f' must be a constant integer}} ++ return res; ++} ++ ++v32i8 xvrepli_b(int var) { ++ v32i8 res = __builtin_lasx_xvrepli_b(-513); // expected-error {{argument value -513 is outside the valid range -512, 511}} ++ res |= __builtin_lasx_xvrepli_b(512); // expected-error {{argument value 512 is outside the valid range -512, 511}} ++ res |= __builtin_lasx_xvrepli_b(var); // expected-error {{argument to '__builtin_lasx_xvrepli_b' must be a constant integer}} ++ return res; ++} ++ ++v4i64 xvrepli_d(int var) { ++ v4i64 res = __builtin_lasx_xvrepli_d(-513); // expected-error {{argument value -513 is outside the valid range -512, 511}} ++ res |= __builtin_lasx_xvrepli_d(512); // expected-error {{argument value 512 is outside the valid range -512, 511}} ++ res |= __builtin_lasx_xvrepli_d(var); // expected-error {{argument to '__builtin_lasx_xvrepli_d' must be a constant integer}} ++ return res; ++} ++ ++v16i16 xvrepli_h(int var) { ++ v16i16 res = __builtin_lasx_xvrepli_h(-513); // expected-error {{argument value -513 is outside the valid range -512, 511}} ++ res |= __builtin_lasx_xvrepli_h(512); // expected-error {{argument value 512 is outside the valid range -512, 511}} ++ res |= __builtin_lasx_xvrepli_h(var); // expected-error {{argument to '__builtin_lasx_xvrepli_h' must be a constant integer}} ++ return res; ++} ++ ++v8i32 xvrepli_w(int var) { ++ v8i32 res = __builtin_lasx_xvrepli_w(-513); // expected-error {{argument value -513 is outside the valid range -512, 511}} ++ res |= __builtin_lasx_xvrepli_w(512); // expected-error {{argument value 512 is outside the valid range -512, 511}} ++ res |= __builtin_lasx_xvrepli_w(var); // expected-error {{argument to '__builtin_lasx_xvrepli_w' must be a constant integer}} ++ return res; ++} +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin.c b/clang/test/CodeGen/LoongArch/lasx/builtin.c +new file mode 100644 +index 000000000000..0185f2004d52 +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin.c +@@ -0,0 +1,4452 @@ ++// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +lasx -O2 -emit-llvm %s -o - | FileCheck %s ++ ++typedef signed char v32i8 __attribute__((vector_size(32), aligned(32))); ++typedef signed char v32i8_b __attribute__((vector_size(32), aligned(1))); ++typedef unsigned char v32u8 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned char v32u8_b __attribute__((vector_size(32), aligned(1))); ++typedef short v16i16 __attribute__((vector_size(32), aligned(32))); ++typedef short v16i16_h __attribute__((vector_size(32), aligned(2))); ++typedef unsigned short v16u16 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned short v16u16_h __attribute__((vector_size(32), aligned(2))); ++typedef int v8i32 __attribute__((vector_size(32), aligned(32))); ++typedef int v8i32_w __attribute__((vector_size(32), aligned(4))); ++typedef unsigned int v8u32 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned int v8u32_w __attribute__((vector_size(32), aligned(4))); ++typedef long long v4i64 __attribute__((vector_size(32), aligned(32))); ++typedef long long v4i64_d __attribute__((vector_size(32), aligned(8))); ++typedef unsigned long long v4u64 __attribute__((vector_size(32), aligned(32))); ++typedef unsigned long long v4u64_d __attribute__((vector_size(32), aligned(8))); ++typedef float v8f32 __attribute__((vector_size(32), aligned(32))); ++typedef float v8f32_w __attribute__((vector_size(32), aligned(4))); ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++typedef double v4f64 __attribute__((vector_size(32), aligned(32))); ++typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); ++ ++// CHECK-LABEL: @xvsll_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsll_b(_1, _2); } ++// CHECK-LABEL: @xvsll_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsll_h(_1, _2); } ++// CHECK-LABEL: @xvsll_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsll_w(_1, _2); } ++// CHECK-LABEL: @xvsll_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsll_d(_1, _2); } ++// CHECK-LABEL: @xvslli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvslli_b(v32i8 _1) { return __builtin_lasx_xvslli_b(_1, 1); } ++// CHECK-LABEL: @xvslli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvslli_h(v16i16 _1) { return __builtin_lasx_xvslli_h(_1, 1); } ++// CHECK-LABEL: @xvslli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvslli_w(v8i32 _1) { return __builtin_lasx_xvslli_w(_1, 1); } ++// CHECK-LABEL: @xvslli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvslli_d(v4i64 _1) { return __builtin_lasx_xvslli_d(_1, 1); } ++// CHECK-LABEL: @xvsra_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsra_b(_1, _2); } ++// CHECK-LABEL: @xvsra_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsra_h(_1, _2); } ++// CHECK-LABEL: @xvsra_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsra_w(_1, _2); } ++// CHECK-LABEL: @xvsra_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsra_d(_1, _2); } ++// CHECK-LABEL: @xvsrai_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrai_b(v32i8 _1) { return __builtin_lasx_xvsrai_b(_1, 1); } ++// CHECK-LABEL: @xvsrai_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrai_h(v16i16 _1) { return __builtin_lasx_xvsrai_h(_1, 1); } ++// CHECK-LABEL: @xvsrai_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrai_w(v8i32 _1) { return __builtin_lasx_xvsrai_w(_1, 1); } ++// CHECK-LABEL: @xvsrai_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrai_d(v4i64 _1) { return __builtin_lasx_xvsrai_d(_1, 1); } ++// CHECK-LABEL: @xvsrar_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrar_b(_1, _2); } ++// CHECK-LABEL: @xvsrar_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrar_h(_1, _2); } ++// CHECK-LABEL: @xvsrar_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrar_w(_1, _2); } ++// CHECK-LABEL: @xvsrar_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrar_d(_1, _2); } ++// CHECK-LABEL: @xvsrari_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrari_b(v32i8 _1) { return __builtin_lasx_xvsrari_b(_1, 1); } ++// CHECK-LABEL: @xvsrari_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrari_h(v16i16 _1) { return __builtin_lasx_xvsrari_h(_1, 1); } ++// CHECK-LABEL: @xvsrari_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrari_w(v8i32 _1) { return __builtin_lasx_xvsrari_w(_1, 1); } ++// CHECK-LABEL: @xvsrari_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrari_d(v4i64 _1) { return __builtin_lasx_xvsrari_d(_1, 1); } ++// CHECK-LABEL: @xvsrl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrl_b(_1, _2); } ++// CHECK-LABEL: @xvsrl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrl_h(_1, _2); } ++// CHECK-LABEL: @xvsrl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrl_w(_1, _2); } ++// CHECK-LABEL: @xvsrl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrl_d(_1, _2); } ++// CHECK-LABEL: @xvsrli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrli_b(v32i8 _1) { return __builtin_lasx_xvsrli_b(_1, 1); } ++// CHECK-LABEL: @xvsrli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrli_h(v16i16 _1) { return __builtin_lasx_xvsrli_h(_1, 1); } ++// CHECK-LABEL: @xvsrli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrli_w(v8i32 _1) { return __builtin_lasx_xvsrli_w(_1, 1); } ++// CHECK-LABEL: @xvsrli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrli_d(v4i64 _1) { return __builtin_lasx_xvsrli_d(_1, 1); } ++// CHECK-LABEL: @xvsrlr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlr_b(_1, _2); } ++// CHECK-LABEL: @xvsrlr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlr_h(_1, _2); } ++// CHECK-LABEL: @xvsrlr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlr_w(_1, _2); } ++// CHECK-LABEL: @xvsrlr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlr_d(_1, _2); } ++// CHECK-LABEL: @xvsrlri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrlri_b(v32i8 _1) { return __builtin_lasx_xvsrlri_b(_1, 1); } ++// CHECK-LABEL: @xvsrlri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrlri_h(v16i16 _1) { return __builtin_lasx_xvsrlri_h(_1, 1); } ++// CHECK-LABEL: @xvsrlri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrlri_w(v8i32 _1) { return __builtin_lasx_xvsrlri_w(_1, 1); } ++// CHECK-LABEL: @xvsrlri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrlri_d(v4i64 _1) { return __builtin_lasx_xvsrlri_d(_1, 1); } ++// CHECK-LABEL: @xvbitclr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitclr_b(_1, _2); } ++// CHECK-LABEL: @xvbitclr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitclr_h(_1, _2); } ++// CHECK-LABEL: @xvbitclr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitclr_w(_1, _2); } ++// CHECK-LABEL: @xvbitclr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitclr_d(_1, _2); } ++// CHECK-LABEL: @xvbitclri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvbitclri_b(v32u8 _1) { return __builtin_lasx_xvbitclri_b(_1, 1); } ++// CHECK-LABEL: @xvbitclri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvbitclri_h(v16u16 _1) { return __builtin_lasx_xvbitclri_h(_1, 1); } ++// CHECK-LABEL: @xvbitclri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvbitclri_w(v8u32 _1) { return __builtin_lasx_xvbitclri_w(_1, 1); } ++// CHECK-LABEL: @xvbitclri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvbitclri_d(v4u64 _1) { return __builtin_lasx_xvbitclri_d(_1, 1); } ++// CHECK-LABEL: @xvbitset_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitset_b(_1, _2); } ++// CHECK-LABEL: @xvbitset_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitset_h(_1, _2); } ++// CHECK-LABEL: @xvbitset_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitset_w(_1, _2); } ++// CHECK-LABEL: @xvbitset_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitset_d(_1, _2); } ++// CHECK-LABEL: @xvbitseti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvbitseti_b(v32u8 _1) { return __builtin_lasx_xvbitseti_b(_1, 1); } ++// CHECK-LABEL: @xvbitseti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvbitseti_h(v16u16 _1) { return __builtin_lasx_xvbitseti_h(_1, 1); } ++// CHECK-LABEL: @xvbitseti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvbitseti_w(v8u32 _1) { return __builtin_lasx_xvbitseti_w(_1, 1); } ++// CHECK-LABEL: @xvbitseti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvbitseti_d(v4u64 _1) { return __builtin_lasx_xvbitseti_d(_1, 1); } ++// CHECK-LABEL: @xvbitrev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitrev_b(_1, _2); } ++// CHECK-LABEL: @xvbitrev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitrev_h(_1, _2); } ++// CHECK-LABEL: @xvbitrev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitrev_w(_1, _2); } ++// CHECK-LABEL: @xvbitrev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitrev_d(_1, _2); } ++// CHECK-LABEL: @xvbitrevi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvbitrevi_b(v32u8 _1) { return __builtin_lasx_xvbitrevi_b(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvbitrevi_h(v16u16 _1) { return __builtin_lasx_xvbitrevi_h(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvbitrevi_w(v8u32 _1) { return __builtin_lasx_xvbitrevi_w(_1, 1); } ++// CHECK-LABEL: @xvbitrevi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvbitrevi_d(v4u64 _1) { return __builtin_lasx_xvbitrevi_d(_1, 1); } ++// CHECK-LABEL: @xvadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadd_b(_1, _2); } ++// CHECK-LABEL: @xvadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadd_h(_1, _2); } ++// CHECK-LABEL: @xvadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadd_w(_1, _2); } ++// CHECK-LABEL: @xvadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_d(_1, _2); } ++// CHECK-LABEL: @xvaddi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvaddi_bu(v32i8 _1) { return __builtin_lasx_xvaddi_bu(_1, 1); } ++// CHECK-LABEL: @xvaddi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvaddi_hu(v16i16 _1) { return __builtin_lasx_xvaddi_hu(_1, 1); } ++// CHECK-LABEL: @xvaddi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvaddi_wu(v8i32 _1) { return __builtin_lasx_xvaddi_wu(_1, 1); } ++// CHECK-LABEL: @xvaddi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddi_du(v4i64 _1) { return __builtin_lasx_xvaddi_du(_1, 1); } ++// CHECK-LABEL: @xvsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsub_b(_1, _2); } ++// CHECK-LABEL: @xvsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsub_h(_1, _2); } ++// CHECK-LABEL: @xvsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsub_w(_1, _2); } ++// CHECK-LABEL: @xvsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_d(_1, _2); } ++// CHECK-LABEL: @xvsubi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsubi_bu(v32i8 _1) { return __builtin_lasx_xvsubi_bu(_1, 1); } ++// CHECK-LABEL: @xvsubi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsubi_hu(v16i16 _1) { return __builtin_lasx_xvsubi_hu(_1, 1); } ++// CHECK-LABEL: @xvsubi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsubi_wu(v8i32 _1) { return __builtin_lasx_xvsubi_wu(_1, 1); } ++// CHECK-LABEL: @xvsubi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubi_du(v4i64 _1) { return __builtin_lasx_xvsubi_du(_1, 1); } ++// CHECK-LABEL: @xvmax_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmax_b(_1, _2); } ++// CHECK-LABEL: @xvmax_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmax_h(_1, _2); } ++// CHECK-LABEL: @xvmax_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmax_w(_1, _2); } ++// CHECK-LABEL: @xvmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmax_d(_1, _2); } ++// CHECK-LABEL: @xvmaxi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmaxi_b(v32i8 _1) { return __builtin_lasx_xvmaxi_b(_1, 1); } ++// CHECK-LABEL: @xvmaxi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmaxi_h(v16i16 _1) { return __builtin_lasx_xvmaxi_h(_1, 1); } ++// CHECK-LABEL: @xvmaxi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmaxi_w(v8i32 _1) { return __builtin_lasx_xvmaxi_w(_1, 1); } ++// CHECK-LABEL: @xvmaxi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaxi_d(v4i64 _1) { return __builtin_lasx_xvmaxi_d(_1, 1); } ++// CHECK-LABEL: @xvmax_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmax_bu(_1, _2); } ++// CHECK-LABEL: @xvmax_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmax_hu(_1, _2); } ++// CHECK-LABEL: @xvmax_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmax_wu(_1, _2); } ++// CHECK-LABEL: @xvmax_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmax_du(_1, _2); } ++// CHECK-LABEL: @xvmaxi_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvmaxi_bu(v32u8 _1) { return __builtin_lasx_xvmaxi_bu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvmaxi_hu(v16u16 _1) { return __builtin_lasx_xvmaxi_hu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvmaxi_wu(v8u32 _1) { return __builtin_lasx_xvmaxi_wu(_1, 1); } ++// CHECK-LABEL: @xvmaxi_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmaxi_du(v4u64 _1) { return __builtin_lasx_xvmaxi_du(_1, 1); } ++// CHECK-LABEL: @xvmin_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmin_b(_1, _2); } ++// CHECK-LABEL: @xvmin_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmin_h(_1, _2); } ++// CHECK-LABEL: @xvmin_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmin_w(_1, _2); } ++// CHECK-LABEL: @xvmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmin_d(_1, _2); } ++// CHECK-LABEL: @xvmini_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmini_b(v32i8 _1) { return __builtin_lasx_xvmini_b(_1, 1); } ++// CHECK-LABEL: @xvmini_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmini_h(v16i16 _1) { return __builtin_lasx_xvmini_h(_1, 1); } ++// CHECK-LABEL: @xvmini_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmini_w(v8i32 _1) { return __builtin_lasx_xvmini_w(_1, 1); } ++// CHECK-LABEL: @xvmini_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmini_d(v4i64 _1) { return __builtin_lasx_xvmini_d(_1, 1); } ++// CHECK-LABEL: @xvmin_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmin_bu(_1, _2); } ++// CHECK-LABEL: @xvmin_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmin_hu(_1, _2); } ++// CHECK-LABEL: @xvmin_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmin_wu(_1, _2); } ++// CHECK-LABEL: @xvmin_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmin_du(_1, _2); } ++// CHECK-LABEL: @xvmini_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvmini_bu(v32u8 _1) { return __builtin_lasx_xvmini_bu(_1, 1); } ++// CHECK-LABEL: @xvmini_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvmini_hu(v16u16 _1) { return __builtin_lasx_xvmini_hu(_1, 1); } ++// CHECK-LABEL: @xvmini_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvmini_wu(v8u32 _1) { return __builtin_lasx_xvmini_wu(_1, 1); } ++// CHECK-LABEL: @xvmini_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmini_du(v4u64 _1) { return __builtin_lasx_xvmini_du(_1, 1); } ++// CHECK-LABEL: @xvseq_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvseq_b(_1, _2); } ++// CHECK-LABEL: @xvseq_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvseq_h(_1, _2); } ++// CHECK-LABEL: @xvseq_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvseq_w(_1, _2); } ++// CHECK-LABEL: @xvseq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvseq_d(_1, _2); } ++// CHECK-LABEL: @xvseqi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvseqi_b(v32i8 _1) { return __builtin_lasx_xvseqi_b(_1, 1); } ++// CHECK-LABEL: @xvseqi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvseqi_h(v16i16 _1) { return __builtin_lasx_xvseqi_h(_1, 1); } ++// CHECK-LABEL: @xvseqi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvseqi_w(v8i32 _1) { return __builtin_lasx_xvseqi_w(_1, 1); } ++// CHECK-LABEL: @xvseqi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvseqi_d(v4i64 _1) { return __builtin_lasx_xvseqi_d(_1, 1); } ++// CHECK-LABEL: @xvslt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvslt_b(_1, _2); } ++// CHECK-LABEL: @xvslt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvslt_h(_1, _2); } ++// CHECK-LABEL: @xvslt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvslt_w(_1, _2); } ++// CHECK-LABEL: @xvslt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvslt_d(_1, _2); } ++// CHECK-LABEL: @xvslti_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvslti_b(v32i8 _1) { return __builtin_lasx_xvslti_b(_1, 1); } ++// CHECK-LABEL: @xvslti_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvslti_h(v16i16 _1) { return __builtin_lasx_xvslti_h(_1, 1); } ++// CHECK-LABEL: @xvslti_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvslti_w(v8i32 _1) { return __builtin_lasx_xvslti_w(_1, 1); } ++// CHECK-LABEL: @xvslti_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvslti_d(v4i64 _1) { return __builtin_lasx_xvslti_d(_1, 1); } ++// CHECK-LABEL: @xvslt_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvslt_bu(_1, _2); } ++// CHECK-LABEL: @xvslt_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvslt_hu(_1, _2); } ++// CHECK-LABEL: @xvslt_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvslt_wu(_1, _2); } ++// CHECK-LABEL: @xvslt_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvslt_du(_1, _2); } ++// CHECK-LABEL: @xvslti_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvslti_bu(v32u8 _1) { return __builtin_lasx_xvslti_bu(_1, 1); } ++// CHECK-LABEL: @xvslti_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvslti_hu(v16u16 _1) { return __builtin_lasx_xvslti_hu(_1, 1); } ++// CHECK-LABEL: @xvslti_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvslti_wu(v8u32 _1) { return __builtin_lasx_xvslti_wu(_1, 1); } ++// CHECK-LABEL: @xvslti_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvslti_du(v4u64 _1) { return __builtin_lasx_xvslti_du(_1, 1); } ++// CHECK-LABEL: @xvsle_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsle_b(_1, _2); } ++// CHECK-LABEL: @xvsle_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsle_h(_1, _2); } ++// CHECK-LABEL: @xvsle_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsle_w(_1, _2); } ++// CHECK-LABEL: @xvsle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsle_d(_1, _2); } ++// CHECK-LABEL: @xvslei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvslei_b(v32i8 _1) { return __builtin_lasx_xvslei_b(_1, 1); } ++// CHECK-LABEL: @xvslei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvslei_h(v16i16 _1) { return __builtin_lasx_xvslei_h(_1, 1); } ++// CHECK-LABEL: @xvslei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvslei_w(v8i32 _1) { return __builtin_lasx_xvslei_w(_1, 1); } ++// CHECK-LABEL: @xvslei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvslei_d(v4i64 _1) { return __builtin_lasx_xvslei_d(_1, 1); } ++// CHECK-LABEL: @xvsle_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsle_bu(_1, _2); } ++// CHECK-LABEL: @xvsle_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsle_hu(_1, _2); } ++// CHECK-LABEL: @xvsle_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsle_wu(_1, _2); } ++// CHECK-LABEL: @xvsle_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsle_du(_1, _2); } ++// CHECK-LABEL: @xvslei_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvslei_bu(v32u8 _1) { return __builtin_lasx_xvslei_bu(_1, 1); } ++// CHECK-LABEL: @xvslei_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvslei_hu(v16u16 _1) { return __builtin_lasx_xvslei_hu(_1, 1); } ++// CHECK-LABEL: @xvslei_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvslei_wu(v8u32 _1) { return __builtin_lasx_xvslei_wu(_1, 1); } ++// CHECK-LABEL: @xvslei_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvslei_du(v4u64 _1) { return __builtin_lasx_xvslei_du(_1, 1); } ++// CHECK-LABEL: @xvsat_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsat_b(v32i8 _1) { return __builtin_lasx_xvsat_b(_1, 1); } ++// CHECK-LABEL: @xvsat_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsat_h(v16i16 _1) { return __builtin_lasx_xvsat_h(_1, 1); } ++// CHECK-LABEL: @xvsat_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsat_w(v8i32 _1) { return __builtin_lasx_xvsat_w(_1, 1); } ++// CHECK-LABEL: @xvsat_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsat_d(v4i64 _1) { return __builtin_lasx_xvsat_d(_1, 1); } ++// CHECK-LABEL: @xvsat_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvsat_bu(v32u8 _1) { return __builtin_lasx_xvsat_bu(_1, 1); } ++// CHECK-LABEL: @xvsat_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvsat_hu(v16u16 _1) { return __builtin_lasx_xvsat_hu(_1, 1); } ++// CHECK-LABEL: @xvsat_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvsat_wu(v8u32 _1) { return __builtin_lasx_xvsat_wu(_1, 1); } ++// CHECK-LABEL: @xvsat_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvsat_du(v4u64 _1) { return __builtin_lasx_xvsat_du(_1, 1); } ++// CHECK-LABEL: @xvadda_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadda_b(_1, _2); } ++// CHECK-LABEL: @xvadda_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadda_h(_1, _2); } ++// CHECK-LABEL: @xvadda_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadda_w(_1, _2); } ++// CHECK-LABEL: @xvadda_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadda_d(_1, _2); } ++// CHECK-LABEL: @xvsadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsadd_b(_1, _2); } ++// CHECK-LABEL: @xvsadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsadd_h(_1, _2); } ++// CHECK-LABEL: @xvsadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsadd_w(_1, _2); } ++// CHECK-LABEL: @xvsadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsadd_d(_1, _2); } ++// CHECK-LABEL: @xvsadd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsadd_bu(_1, _2); } ++// CHECK-LABEL: @xvsadd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsadd_hu(_1, _2); } ++// CHECK-LABEL: @xvsadd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsadd_wu(_1, _2); } ++// CHECK-LABEL: @xvsadd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsadd_du(_1, _2); } ++// CHECK-LABEL: @xvavg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavg_b(_1, _2); } ++// CHECK-LABEL: @xvavg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavg_h(_1, _2); } ++// CHECK-LABEL: @xvavg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavg_w(_1, _2); } ++// CHECK-LABEL: @xvavg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavg_d(_1, _2); } ++// CHECK-LABEL: @xvavg_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavg_bu(_1, _2); } ++// CHECK-LABEL: @xvavg_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavg_hu(_1, _2); } ++// CHECK-LABEL: @xvavg_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavg_wu(_1, _2); } ++// CHECK-LABEL: @xvavg_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavg_du(_1, _2); } ++// CHECK-LABEL: @xvavgr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavgr_b(_1, _2); } ++// CHECK-LABEL: @xvavgr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavgr_h(_1, _2); } ++// CHECK-LABEL: @xvavgr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavgr_w(_1, _2); } ++// CHECK-LABEL: @xvavgr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavgr_d(_1, _2); } ++// CHECK-LABEL: @xvavgr_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavgr_bu(_1, _2); } ++// CHECK-LABEL: @xvavgr_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavgr_hu(_1, _2); } ++// CHECK-LABEL: @xvavgr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavgr_wu(_1, _2); } ++// CHECK-LABEL: @xvavgr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavgr_du(_1, _2); } ++// CHECK-LABEL: @xvssub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssub_b(_1, _2); } ++// CHECK-LABEL: @xvssub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssub_h(_1, _2); } ++// CHECK-LABEL: @xvssub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssub_w(_1, _2); } ++// CHECK-LABEL: @xvssub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssub_d(_1, _2); } ++// CHECK-LABEL: @xvssub_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvssub_bu(_1, _2); } ++// CHECK-LABEL: @xvssub_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssub_hu(_1, _2); } ++// CHECK-LABEL: @xvssub_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssub_wu(_1, _2); } ++// CHECK-LABEL: @xvssub_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssub_du(_1, _2); } ++// CHECK-LABEL: @xvabsd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvabsd_b(_1, _2); } ++// CHECK-LABEL: @xvabsd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvabsd_h(_1, _2); } ++// CHECK-LABEL: @xvabsd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvabsd_w(_1, _2); } ++// CHECK-LABEL: @xvabsd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvabsd_d(_1, _2); } ++// CHECK-LABEL: @xvabsd_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvabsd_bu(_1, _2); } ++// CHECK-LABEL: @xvabsd_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvabsd_hu(_1, _2); } ++// CHECK-LABEL: @xvabsd_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvabsd_wu(_1, _2); } ++// CHECK-LABEL: @xvabsd_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvabsd_du(_1, _2); } ++// CHECK-LABEL: @xvmul_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmul_b(_1, _2); } ++// CHECK-LABEL: @xvmul_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmul_h(_1, _2); } ++// CHECK-LABEL: @xvmul_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmul_w(_1, _2); } ++// CHECK-LABEL: @xvmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmul_d(_1, _2); } ++// CHECK-LABEL: @xvmadd_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmadd_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmadd_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmadd_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmsub_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmsub_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmsub_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvdiv_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvdiv_b(_1, _2); } ++// CHECK-LABEL: @xvdiv_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvdiv_h(_1, _2); } ++// CHECK-LABEL: @xvdiv_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvdiv_w(_1, _2); } ++// CHECK-LABEL: @xvdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvdiv_d(_1, _2); } ++// CHECK-LABEL: @xvdiv_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvdiv_bu(_1, _2); } ++// CHECK-LABEL: @xvdiv_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvdiv_hu(_1, _2); } ++// CHECK-LABEL: @xvdiv_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvdiv_wu(_1, _2); } ++// CHECK-LABEL: @xvdiv_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvdiv_du(_1, _2); } ++// CHECK-LABEL: @xvhaddw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhaddw_h_b(_1, _2); } ++// CHECK-LABEL: @xvhaddw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhaddw_w_h(_1, _2); } ++// CHECK-LABEL: @xvhaddw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhaddw_d_w(_1, _2); } ++// CHECK-LABEL: @xvhaddw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhaddw_hu_bu(_1, _2); } ++// CHECK-LABEL: @xvhaddw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhaddw_wu_hu(_1, _2); } ++// CHECK-LABEL: @xvhaddw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhaddw_du_wu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhsubw_h_b(_1, _2); } ++// CHECK-LABEL: @xvhsubw_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhsubw_w_h(_1, _2); } ++// CHECK-LABEL: @xvhsubw_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhsubw_d_w(_1, _2); } ++// CHECK-LABEL: @xvhsubw_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhsubw_hu_bu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhsubw_wu_hu(_1, _2); } ++// CHECK-LABEL: @xvhsubw_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhsubw_du_wu(_1, _2); } ++// CHECK-LABEL: @xvmod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmod_b(_1, _2); } ++// CHECK-LABEL: @xvmod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmod_h(_1, _2); } ++// CHECK-LABEL: @xvmod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmod_w(_1, _2); } ++// CHECK-LABEL: @xvmod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmod_d(_1, _2); } ++// CHECK-LABEL: @xvmod_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmod_bu(_1, _2); } ++// CHECK-LABEL: @xvmod_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmod_hu(_1, _2); } ++// CHECK-LABEL: @xvmod_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmod_wu(_1, _2); } ++// CHECK-LABEL: @xvmod_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmod_du(_1, _2); } ++// CHECK-LABEL: @xvrepl128vei_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvrepl128vei_b(v32i8 _1) { return __builtin_lasx_xvrepl128vei_b(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvrepl128vei_h(v16i16 _1) { return __builtin_lasx_xvrepl128vei_h(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvrepl128vei_w(v8i32 _1) { return __builtin_lasx_xvrepl128vei_w(_1, 1); } ++// CHECK-LABEL: @xvrepl128vei_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvrepl128vei_d(v4i64 _1) { return __builtin_lasx_xvrepl128vei_d(_1, 1); } ++// CHECK-LABEL: @xvpickev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickev_b(_1, _2); } ++// CHECK-LABEL: @xvpickev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickev_h(_1, _2); } ++// CHECK-LABEL: @xvpickev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickev_w(_1, _2); } ++// CHECK-LABEL: @xvpickev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickev_d(_1, _2); } ++// CHECK-LABEL: @xvpickod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickod_b(_1, _2); } ++// CHECK-LABEL: @xvpickod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickod_h(_1, _2); } ++// CHECK-LABEL: @xvpickod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickod_w(_1, _2); } ++// CHECK-LABEL: @xvpickod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickod_d(_1, _2); } ++// CHECK-LABEL: @xvilvh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvh_b(_1, _2); } ++// CHECK-LABEL: @xvilvh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvh_h(_1, _2); } ++// CHECK-LABEL: @xvilvh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvh_w(_1, _2); } ++// CHECK-LABEL: @xvilvh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvh_d(_1, _2); } ++// CHECK-LABEL: @xvilvl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvl_b(_1, _2); } ++// CHECK-LABEL: @xvilvl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvl_h(_1, _2); } ++// CHECK-LABEL: @xvilvl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvl_w(_1, _2); } ++// CHECK-LABEL: @xvilvl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvl_d(_1, _2); } ++// CHECK-LABEL: @xvpackev_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackev_b(_1, _2); } ++// CHECK-LABEL: @xvpackev_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackev_h(_1, _2); } ++// CHECK-LABEL: @xvpackev_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackev_w(_1, _2); } ++// CHECK-LABEL: @xvpackev_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackev_d(_1, _2); } ++// CHECK-LABEL: @xvpackod_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackod_b(_1, _2); } ++// CHECK-LABEL: @xvpackod_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackod_h(_1, _2); } ++// CHECK-LABEL: @xvpackod_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackod_w(_1, _2); } ++// CHECK-LABEL: @xvpackod_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackod_d(_1, _2); } ++// CHECK-LABEL: @xvshuf_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvshuf_b(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvshuf_h(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvshuf_w(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvshuf_d(_1, _2, _3); } ++// CHECK-LABEL: @xvand_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvand_v(_1, _2); } ++// CHECK-LABEL: @xvandi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvandi_b(v32u8 _1) { return __builtin_lasx_xvandi_b(_1, 1); } ++// CHECK-LABEL: @xvor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvor_v(_1, _2); } ++// CHECK-LABEL: @xvori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvori_b(v32u8 _1) { return __builtin_lasx_xvori_b(_1, 1); } ++// CHECK-LABEL: @xvnor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvnor_v(_1, _2); } ++// CHECK-LABEL: @xvnori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvnori_b(v32u8 _1) { return __builtin_lasx_xvnori_b(_1, 1); } ++// CHECK-LABEL: @xvxor_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvxor_v(_1, _2); } ++// CHECK-LABEL: @xvxori_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvxori_b(v32u8 _1) { return __builtin_lasx_xvxori_b(_1, 1); } ++// CHECK-LABEL: @xvbitsel_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvbitsel_v(_1, _2, _3); } ++// CHECK-LABEL: @xvbitseli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitseli_b(_1, _2, 1); } ++// CHECK-LABEL: @xvshuf4i_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvshuf4i_b(v32i8 _1) { return __builtin_lasx_xvshuf4i_b(_1, 1); } ++// CHECK-LABEL: @xvshuf4i_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvshuf4i_h(v16i16 _1) { return __builtin_lasx_xvshuf4i_h(_1, 1); } ++// CHECK-LABEL: @xvshuf4i_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvshuf4i_w(v8i32 _1) { return __builtin_lasx_xvshuf4i_w(_1, 1); } ++// CHECK-LABEL: @xvreplgr2vr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvreplgr2vr_b(int _1) { return __builtin_lasx_xvreplgr2vr_b(_1); } ++// CHECK-LABEL: @xvreplgr2vr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvreplgr2vr_h(int _1) { return __builtin_lasx_xvreplgr2vr_h(_1); } ++// CHECK-LABEL: @xvreplgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvreplgr2vr_w(int _1) { return __builtin_lasx_xvreplgr2vr_w(_1); } ++// CHECK-LABEL: @xvreplgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: CONV:%.* = sext i32 _1:%.* to i64 ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 CONV) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvreplgr2vr_d(int _1) { return __builtin_lasx_xvreplgr2vr_d(_1); } ++// CHECK-LABEL: @xvpcnt_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvpcnt_b(v32i8 _1) { return __builtin_lasx_xvpcnt_b(_1); } ++// CHECK-LABEL: @xvpcnt_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvpcnt_h(v16i16 _1) { return __builtin_lasx_xvpcnt_h(_1); } ++// CHECK-LABEL: @xvpcnt_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvpcnt_w(v8i32 _1) { return __builtin_lasx_xvpcnt_w(_1); } ++// CHECK-LABEL: @xvpcnt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvpcnt_d(v4i64 _1) { return __builtin_lasx_xvpcnt_d(_1); } ++// CHECK-LABEL: @xvclo_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvclo_b(v32i8 _1) { return __builtin_lasx_xvclo_b(_1); } ++// CHECK-LABEL: @xvclo_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvclo_h(v16i16 _1) { return __builtin_lasx_xvclo_h(_1); } ++// CHECK-LABEL: @xvclo_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvclo_w(v8i32 _1) { return __builtin_lasx_xvclo_w(_1); } ++// CHECK-LABEL: @xvclo_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvclo_d(v4i64 _1) { return __builtin_lasx_xvclo_d(_1); } ++// CHECK-LABEL: @xvclz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvclz_b(v32i8 _1) { return __builtin_lasx_xvclz_b(_1); } ++// CHECK-LABEL: @xvclz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvclz_h(v16i16 _1) { return __builtin_lasx_xvclz_h(_1); } ++// CHECK-LABEL: @xvclz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvclz_w(v8i32 _1) { return __builtin_lasx_xvclz_w(_1); } ++// CHECK-LABEL: @xvclz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvclz_d(v4i64 _1) { return __builtin_lasx_xvclz_d(_1); } ++// CHECK-LABEL: @xvfadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfadd_s(_1, _2); } ++// CHECK-LABEL: @xvfadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfadd_d(_1, _2); } ++// CHECK-LABEL: @xvfsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfsub_s(_1, _2); } ++// CHECK-LABEL: @xvfsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfsub_d(_1, _2); } ++// CHECK-LABEL: @xvfmul_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmul_s(_1, _2); } ++// CHECK-LABEL: @xvfmul_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmul_d(_1, _2); } ++// CHECK-LABEL: @xvfdiv_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfdiv_s(_1, _2); } ++// CHECK-LABEL: @xvfdiv_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfdiv_d(_1, _2); } ++// CHECK-LABEL: @xvfcvt_h_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcvt_h_s(_1, _2); } ++// CHECK-LABEL: @xvfcvt_s_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcvt_s_d(_1, _2); } ++// CHECK-LABEL: @xvfmin_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmin_s(_1, _2); } ++// CHECK-LABEL: @xvfmin_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmin_d(_1, _2); } ++// CHECK-LABEL: @xvfmina_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmina_s(_1, _2); } ++// CHECK-LABEL: @xvfmina_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmina_d(_1, _2); } ++// CHECK-LABEL: @xvfmax_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmax_s(_1, _2); } ++// CHECK-LABEL: @xvfmax_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmax_d(_1, _2); } ++// CHECK-LABEL: @xvfmaxa_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmaxa_s(_1, _2); } ++// CHECK-LABEL: @xvfmaxa_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmaxa_d(_1, _2); } ++// CHECK-LABEL: @xvfclass_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfclass_s(v8f32 _1) { return __builtin_lasx_xvfclass_s(_1); } ++// CHECK-LABEL: @xvfclass_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfclass_d(v4f64 _1) { return __builtin_lasx_xvfclass_d(_1); } ++// CHECK-LABEL: @xvfsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfsqrt_s(v8f32 _1) { return __builtin_lasx_xvfsqrt_s(_1); } ++// CHECK-LABEL: @xvfsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfsqrt_d(v4f64 _1) { return __builtin_lasx_xvfsqrt_d(_1); } ++// CHECK-LABEL: @xvfrecip_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfrecip_s(v8f32 _1) { return __builtin_lasx_xvfrecip_s(_1); } ++// CHECK-LABEL: @xvfrecip_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfrecip_d(v4f64 _1) { return __builtin_lasx_xvfrecip_d(_1); } ++// CHECK-LABEL: @xvfrint_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfrint_s(v8f32 _1) { return __builtin_lasx_xvfrint_s(_1); } ++// CHECK-LABEL: @xvfrint_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfrint_d(v4f64 _1) { return __builtin_lasx_xvfrint_d(_1); } ++// CHECK-LABEL: @xvfrsqrt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfrsqrt_s(v8f32 _1) { return __builtin_lasx_xvfrsqrt_s(_1); } ++// CHECK-LABEL: @xvfrsqrt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfrsqrt_d(v4f64 _1) { return __builtin_lasx_xvfrsqrt_d(_1); } ++// CHECK-LABEL: @xvflogb_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvflogb_s(v8f32 _1) { return __builtin_lasx_xvflogb_s(_1); } ++// CHECK-LABEL: @xvflogb_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvflogb_d(v4f64 _1) { return __builtin_lasx_xvflogb_d(_1); } ++// CHECK-LABEL: @xvfcvth_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfcvth_s_h(v16i16 _1) { return __builtin_lasx_xvfcvth_s_h(_1); } ++// CHECK-LABEL: @xvfcvth_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfcvth_d_s(v8f32 _1) { return __builtin_lasx_xvfcvth_d_s(_1); } ++// CHECK-LABEL: @xvfcvtl_s_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfcvtl_s_h(v16i16 _1) { return __builtin_lasx_xvfcvtl_s_h(_1); } ++// CHECK-LABEL: @xvfcvtl_d_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfcvtl_d_s(v8f32 _1) { return __builtin_lasx_xvfcvtl_d_s(_1); } ++// CHECK-LABEL: @xvftint_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftint_w_s(v8f32 _1) { return __builtin_lasx_xvftint_w_s(_1); } ++// CHECK-LABEL: @xvftint_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftint_l_d(v4f64 _1) { return __builtin_lasx_xvftint_l_d(_1); } ++// CHECK-LABEL: @xvftint_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvftint_wu_s(v8f32 _1) { return __builtin_lasx_xvftint_wu_s(_1); } ++// CHECK-LABEL: @xvftint_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvftint_lu_d(v4f64 _1) { return __builtin_lasx_xvftint_lu_d(_1); } ++// CHECK-LABEL: @xvftintrz_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftintrz_w_s(v8f32 _1) { return __builtin_lasx_xvftintrz_w_s(_1); } ++// CHECK-LABEL: @xvftintrz_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrz_l_d(v4f64 _1) { return __builtin_lasx_xvftintrz_l_d(_1); } ++// CHECK-LABEL: @xvftintrz_wu_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvftintrz_wu_s(v8f32 _1) { return __builtin_lasx_xvftintrz_wu_s(_1); } ++// CHECK-LABEL: @xvftintrz_lu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvftintrz_lu_d(v4f64 _1) { return __builtin_lasx_xvftintrz_lu_d(_1); } ++// CHECK-LABEL: @xvffint_s_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvffint_s_w(v8i32 _1) { return __builtin_lasx_xvffint_s_w(_1); } ++// CHECK-LABEL: @xvffint_d_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvffint_d_l(v4i64 _1) { return __builtin_lasx_xvffint_d_l(_1); } ++// CHECK-LABEL: @xvffint_s_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvffint_s_wu(v8u32 _1) { return __builtin_lasx_xvffint_s_wu(_1); } ++// CHECK-LABEL: @xvffint_d_lu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvffint_d_lu(v4u64 _1) { return __builtin_lasx_xvffint_d_lu(_1); } ++// CHECK-LABEL: @xvreplve_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> _1:%.*, i32 _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvreplve_b(v32i8 _1, int _2) { return __builtin_lasx_xvreplve_b(_1, _2); } ++// CHECK-LABEL: @xvreplve_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> _1:%.*, i32 _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvreplve_h(v16i16 _1, int _2) { return __builtin_lasx_xvreplve_h(_1, _2); } ++// CHECK-LABEL: @xvreplve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> _1:%.*, i32 _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvreplve_w(v8i32 _1, int _2) { return __builtin_lasx_xvreplve_w(_1, _2); } ++// CHECK-LABEL: @xvreplve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> _1:%.*, i32 _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvreplve_d(v4i64 _1, int _2) { return __builtin_lasx_xvreplve_d(_1, _2); } ++// CHECK-LABEL: @xvpermi_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpermi_w(_1, _2, 1); } ++// CHECK-LABEL: @xvandn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvandn_v(_1, _2); } ++// CHECK-LABEL: @xvneg_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvneg_b(v32i8 _1) { return __builtin_lasx_xvneg_b(_1); } ++// CHECK-LABEL: @xvneg_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvneg_h(v16i16 _1) { return __builtin_lasx_xvneg_h(_1); } ++// CHECK-LABEL: @xvneg_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvneg_w(v8i32 _1) { return __builtin_lasx_xvneg_w(_1); } ++// CHECK-LABEL: @xvneg_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvneg_d(v4i64 _1) { return __builtin_lasx_xvneg_d(_1); } ++// CHECK-LABEL: @xvmuh_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmuh_b(_1, _2); } ++// CHECK-LABEL: @xvmuh_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmuh_h(_1, _2); } ++// CHECK-LABEL: @xvmuh_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmuh_w(_1, _2); } ++// CHECK-LABEL: @xvmuh_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmuh_d(_1, _2); } ++// CHECK-LABEL: @xvmuh_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmuh_bu(_1, _2); } ++// CHECK-LABEL: @xvmuh_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmuh_hu(_1, _2); } ++// CHECK-LABEL: @xvmuh_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmuh_wu(_1, _2); } ++// CHECK-LABEL: @xvmuh_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmuh_du(_1, _2); } ++// CHECK-LABEL: @xvsllwil_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsllwil_h_b(v32i8 _1) { return __builtin_lasx_xvsllwil_h_b(_1, 1); } ++// CHECK-LABEL: @xvsllwil_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsllwil_w_h(v16i16 _1) { return __builtin_lasx_xvsllwil_w_h(_1, 1); } ++// CHECK-LABEL: @xvsllwil_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsllwil_d_w(v8i32 _1) { return __builtin_lasx_xvsllwil_d_w(_1, 1); } ++// CHECK-LABEL: @xvsllwil_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvsllwil_hu_bu(v32u8 _1) { return __builtin_lasx_xvsllwil_hu_bu(_1, 1); } ++// CHECK-LABEL: @xvsllwil_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvsllwil_wu_hu(v16u16 _1) { return __builtin_lasx_xvsllwil_wu_hu(_1, 1); } ++// CHECK-LABEL: @xvsllwil_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvsllwil_du_wu(v8u32 _1) { return __builtin_lasx_xvsllwil_du_wu(_1, 1); } ++// CHECK-LABEL: @xvsran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsran_b_h(_1, _2); } ++// CHECK-LABEL: @xvsran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsran_h_w(_1, _2); } ++// CHECK-LABEL: @xvsran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsran_w_d(_1, _2); } ++// CHECK-LABEL: @xvssran_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssran_b_h(_1, _2); } ++// CHECK-LABEL: @xvssran_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssran_h_w(_1, _2); } ++// CHECK-LABEL: @xvssran_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssran_w_d(_1, _2); } ++// CHECK-LABEL: @xvssran_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssran_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssran_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssran_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssran_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssran_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarn_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarn_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrarn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarn_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrarn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarn_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrarn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrarn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrarn_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrarn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrarn_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrarn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrarn_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrln_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrln_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrln_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrln_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrln_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrln_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrln_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrln_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrln_wu_d(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @xvsrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrlrn_bu_h(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrlrn_hu_w(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrlrn_wu_d(_1, _2); } ++// CHECK-LABEL: @xvfrstpi_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvfrstpi_b(_1, _2, 1); } ++// CHECK-LABEL: @xvfrstpi_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvfrstpi_h(_1, _2, 1); } ++// CHECK-LABEL: @xvfrstp_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvfrstp_b(_1, _2, _3); } ++// CHECK-LABEL: @xvfrstp_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvfrstp_h(_1, _2, _3); } ++// CHECK-LABEL: @xvshuf4i_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvshuf4i_d(_1, _2, 1); } ++// CHECK-LABEL: @xvbsrl_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvbsrl_v(v32i8 _1) { return __builtin_lasx_xvbsrl_v(_1, 1); } ++// CHECK-LABEL: @xvbsll_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvbsll_v(v32i8 _1) { return __builtin_lasx_xvbsll_v(_1, 1); } ++// CHECK-LABEL: @xvextrins_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvextrins_b(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvextrins_h(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvextrins_w(_1, _2, 1); } ++// CHECK-LABEL: @xvextrins_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvextrins_d(_1, _2, 1); } ++// CHECK-LABEL: @xvmskltz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmskltz_b(v32i8 _1) { return __builtin_lasx_xvmskltz_b(_1); } ++// CHECK-LABEL: @xvmskltz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmskltz_h(v16i16 _1) { return __builtin_lasx_xvmskltz_h(_1); } ++// CHECK-LABEL: @xvmskltz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmskltz_w(v8i32 _1) { return __builtin_lasx_xvmskltz_w(_1); } ++// CHECK-LABEL: @xvmskltz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmskltz_d(v4i64 _1) { return __builtin_lasx_xvmskltz_d(_1); } ++// CHECK-LABEL: @xvsigncov_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsigncov_b(_1, _2); } ++// CHECK-LABEL: @xvsigncov_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsigncov_h(_1, _2); } ++// CHECK-LABEL: @xvsigncov_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsigncov_w(_1, _2); } ++// CHECK-LABEL: @xvsigncov_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsigncov_d(_1, _2); } ++// CHECK-LABEL: @xvfmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> _1:%.*, <8 x float> _2:%.*, <8 x float> _3:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmadd_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> _1:%.*, <4 x double> _2:%.*, <4 x double> _3:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> _1:%.*, <8 x float> _2:%.*, <8 x float> _3:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmsub_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> _1:%.*, <4 x double> _2:%.*, <4 x double> _3:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmadd_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> _1:%.*, <8 x float> _2:%.*, <8 x float> _3:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmadd_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmadd_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> _1:%.*, <4 x double> _2:%.*, <4 x double> _3:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmadd_d(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmsub_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> _1:%.*, <8 x float> _2:%.*, <8 x float> _3:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmsub_s(_1, _2, _3); } ++// CHECK-LABEL: @xvfnmsub_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> _1:%.*, <4 x double> _2:%.*, <4 x double> _3:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmsub_d(_1, _2, _3); } ++// CHECK-LABEL: @xvftintrne_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftintrne_w_s(v8f32 _1) { return __builtin_lasx_xvftintrne_w_s(_1); } ++// CHECK-LABEL: @xvftintrne_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrne_l_d(v4f64 _1) { return __builtin_lasx_xvftintrne_l_d(_1); } ++// CHECK-LABEL: @xvftintrp_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftintrp_w_s(v8f32 _1) { return __builtin_lasx_xvftintrp_w_s(_1); } ++// CHECK-LABEL: @xvftintrp_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrp_l_d(v4f64 _1) { return __builtin_lasx_xvftintrp_l_d(_1); } ++// CHECK-LABEL: @xvftintrm_w_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftintrm_w_s(v8f32 _1) { return __builtin_lasx_xvftintrm_w_s(_1); } ++// CHECK-LABEL: @xvftintrm_l_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrm_l_d(v4f64 _1) { return __builtin_lasx_xvftintrm_l_d(_1); } ++// CHECK-LABEL: @xvftint_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftint_w_d(_1, _2); } ++// CHECK-LABEL: @xvffint_s_l( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvffint_s_l(_1, _2); } ++// CHECK-LABEL: @xvftintrz_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrz_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrp_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrp_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrm_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrm_w_d(_1, _2); } ++// CHECK-LABEL: @xvftintrne_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrne_w_d(_1, _2); } ++// CHECK-LABEL: @xvftinth_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftinth_l_s(v8f32 _1) { return __builtin_lasx_xvftinth_l_s(_1); } ++// CHECK-LABEL: @xvftintl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintl_l_s(v8f32 _1) { return __builtin_lasx_xvftintl_l_s(_1); } ++// CHECK-LABEL: @xvffinth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvffinth_d_w(v8i32 _1) { return __builtin_lasx_xvffinth_d_w(_1); } ++// CHECK-LABEL: @xvffintl_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvffintl_d_w(v8i32 _1) { return __builtin_lasx_xvffintl_d_w(_1); } ++// CHECK-LABEL: @xvftintrzh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrzh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzh_l_s(_1); } ++// CHECK-LABEL: @xvftintrzl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrzl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzl_l_s(_1); } ++// CHECK-LABEL: @xvftintrph_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrph_l_s(v8f32 _1) { return __builtin_lasx_xvftintrph_l_s(_1); } ++// CHECK-LABEL: @xvftintrpl_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrpl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrpl_l_s(_1); } ++// CHECK-LABEL: @xvftintrmh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrmh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrmh_l_s(_1); } ++// CHECK-LABEL: @xvftintrml_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrml_l_s(v8f32 _1) { return __builtin_lasx_xvftintrml_l_s(_1); } ++// CHECK-LABEL: @xvftintrneh_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrneh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrneh_l_s(_1); } ++// CHECK-LABEL: @xvftintrnel_l_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvftintrnel_l_s(v8f32 _1) { return __builtin_lasx_xvftintrnel_l_s(_1); } ++// CHECK-LABEL: @xvfrintrne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <8 x float> TMP0 to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> TMP1 ++// ++v8i32 xvfrintrne_s(v8f32 _1) { return __builtin_lasx_xvfrintrne_s(_1); } ++// CHECK-LABEL: @xvfrintrne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x double> TMP0 to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> TMP1 ++// ++v4i64 xvfrintrne_d(v4f64 _1) { return __builtin_lasx_xvfrintrne_d(_1); } ++// CHECK-LABEL: @xvfrintrz_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <8 x float> TMP0 to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> TMP1 ++// ++v8i32 xvfrintrz_s(v8f32 _1) { return __builtin_lasx_xvfrintrz_s(_1); } ++// CHECK-LABEL: @xvfrintrz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x double> TMP0 to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> TMP1 ++// ++v4i64 xvfrintrz_d(v4f64 _1) { return __builtin_lasx_xvfrintrz_d(_1); } ++// CHECK-LABEL: @xvfrintrp_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <8 x float> TMP0 to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> TMP1 ++// ++v8i32 xvfrintrp_s(v8f32 _1) { return __builtin_lasx_xvfrintrp_s(_1); } ++// CHECK-LABEL: @xvfrintrp_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x double> TMP0 to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> TMP1 ++// ++v4i64 xvfrintrp_d(v4f64 _1) { return __builtin_lasx_xvfrintrp_d(_1); } ++// CHECK-LABEL: @xvfrintrm_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <8 x float> TMP0 to <8 x i32> ++// CHECK-NEXT: ret <8 x i32> TMP1 ++// ++v8i32 xvfrintrm_s(v8f32 _1) { return __builtin_lasx_xvfrintrm_s(_1); } ++// CHECK-LABEL: @xvfrintrm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> _1:%.*) ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x double> TMP0 to <4 x i64> ++// CHECK-NEXT: ret <4 x i64> TMP1 ++// ++v4i64 xvfrintrm_d(v4f64 _1) { return __builtin_lasx_xvfrintrm_d(_1); } ++// CHECK-LABEL: @xvld( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvld(void *_1) { return __builtin_lasx_xvld(_1, 1); } ++// CHECK-LABEL: @xvst( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> _1:%.*, ptr _2:%.*, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvst(v32i8 _1, void *_2) { return __builtin_lasx_xvst(_1, _2, 1); } ++// CHECK-LABEL: @xvstelm_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> _1:%.*, ptr _2:%.*, i32 1, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_b(v32i8 _1, void * _2) { return __builtin_lasx_xvstelm_b(_1, _2, 1, 1); } ++// CHECK-LABEL: @xvstelm_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> _1:%.*, ptr _2:%.*, i32 2, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_h(v16i16 _1, void * _2) { return __builtin_lasx_xvstelm_h(_1, _2, 2, 1); } ++// CHECK-LABEL: @xvstelm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> _1:%.*, ptr _2:%.*, i32 4, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_w(v8i32 _1, void * _2) { return __builtin_lasx_xvstelm_w(_1, _2, 4, 1); } ++// CHECK-LABEL: @xvstelm_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> _1:%.*, ptr _2:%.*, i32 8, i32 1) ++// CHECK-NEXT: ret void ++// ++void xvstelm_d(v4i64 _1, void * _2) { return __builtin_lasx_xvstelm_d(_1, _2, 8, 1); } ++// CHECK-LABEL: @xvinsve0_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvinsve0_w(_1, _2, 1); } ++// CHECK-LABEL: @xvinsve0_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvinsve0_d(_1, _2, 1); } ++// CHECK-LABEL: @xvpickve_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvpickve_w(v8i32 _1) { return __builtin_lasx_xvpickve_w(_1, 1); } ++// CHECK-LABEL: @xvpickve_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvpickve_d(v4i64 _1) { return __builtin_lasx_xvpickve_d(_1, 1); } ++// CHECK-LABEL: @xvssrlrn_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrn_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrn_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrlrn_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrn_w_d(_1, _2); } ++// CHECK-LABEL: @xvssrln_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrln_b_h(_1, _2); } ++// CHECK-LABEL: @xvssrln_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrln_h_w(_1, _2); } ++// CHECK-LABEL: @xvssrln_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrln_w_d(_1, _2); } ++// CHECK-LABEL: @xvorn_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvorn_v(_1, _2); } ++// CHECK-LABEL: @xvldi( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvldi() { return __builtin_lasx_xvldi(1); } ++// CHECK-LABEL: @xvldx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr _1:%.*, i64 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvldx(void *_1) { return __builtin_lasx_xvldx(_1, 1); } ++// CHECK-LABEL: @xvstx( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> _1:%.*, ptr _2:%.*, i64 1) ++// CHECK-NEXT: ret void ++// ++void xvstx(v32i8 _1, void *_2) { return __builtin_lasx_xvstx(_1, _2, 1); } ++// CHECK-LABEL: @xvextl_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvextl_qu_du(v4u64 _1) { return __builtin_lasx_xvextl_qu_du(_1); } ++// CHECK-LABEL: @xvinsgr2vr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> _1:%.*, i32 1, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvinsgr2vr_w(v8i32 _1) { return __builtin_lasx_xvinsgr2vr_w(_1, 1, 1); } ++// CHECK-LABEL: @xvinsgr2vr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> _1:%.*, i64 1, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvinsgr2vr_d(v4i64 _1) { return __builtin_lasx_xvinsgr2vr_d(_1, 1, 1); } ++// CHECK-LABEL: @xvreplve0_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvreplve0_b(v32i8 _1) { return __builtin_lasx_xvreplve0_b(_1); } ++// CHECK-LABEL: @xvreplve0_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvreplve0_h(v16i16 _1) { return __builtin_lasx_xvreplve0_h(_1); } ++// CHECK-LABEL: @xvreplve0_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvreplve0_w(v8i32 _1) { return __builtin_lasx_xvreplve0_w(_1); } ++// CHECK-LABEL: @xvreplve0_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvreplve0_d(v4i64 _1) { return __builtin_lasx_xvreplve0_d(_1); } ++// CHECK-LABEL: @xvreplve0_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvreplve0_q(v32i8 _1) { return __builtin_lasx_xvreplve0_q(_1); } ++// CHECK-LABEL: @vext2xv_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 vext2xv_h_b(v32i8 _1) { return __builtin_lasx_vext2xv_h_b(_1); } ++// CHECK-LABEL: @vext2xv_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 vext2xv_w_h(v16i16 _1) { return __builtin_lasx_vext2xv_w_h(_1); } ++// CHECK-LABEL: @vext2xv_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 vext2xv_d_w(v8i32 _1) { return __builtin_lasx_vext2xv_d_w(_1); } ++// CHECK-LABEL: @vext2xv_w_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 vext2xv_w_b(v32i8 _1) { return __builtin_lasx_vext2xv_w_b(_1); } ++// CHECK-LABEL: @vext2xv_d_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 vext2xv_d_h(v16i16 _1) { return __builtin_lasx_vext2xv_d_h(_1); } ++// CHECK-LABEL: @vext2xv_d_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 vext2xv_d_b(v32i8 _1) { return __builtin_lasx_vext2xv_d_b(_1); } ++// CHECK-LABEL: @vext2xv_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 vext2xv_hu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_hu_bu(_1); } ++// CHECK-LABEL: @vext2xv_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 vext2xv_wu_hu(v16i16 _1) { return __builtin_lasx_vext2xv_wu_hu(_1); } ++// CHECK-LABEL: @vext2xv_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 vext2xv_du_wu(v8i32 _1) { return __builtin_lasx_vext2xv_du_wu(_1); } ++// CHECK-LABEL: @vext2xv_wu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 vext2xv_wu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_wu_bu(_1); } ++// CHECK-LABEL: @vext2xv_du_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 vext2xv_du_hu(v16i16 _1) { return __builtin_lasx_vext2xv_du_hu(_1); } ++// CHECK-LABEL: @vext2xv_du_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 vext2xv_du_bu(v32i8 _1) { return __builtin_lasx_vext2xv_du_bu(_1); } ++// CHECK-LABEL: @xvpermi_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpermi_q(_1, _2, 1); } ++// CHECK-LABEL: @xvpermi_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvpermi_d(v4i64 _1) { return __builtin_lasx_xvpermi_d(_1, 1); } ++// CHECK-LABEL: @xvperm_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvperm_w(_1, _2); } ++// CHECK-LABEL: @xvldrepl_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvldrepl_b(void *_1) { return __builtin_lasx_xvldrepl_b(_1, 1); } ++// CHECK-LABEL: @xvldrepl_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr _1:%.*, i32 2) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvldrepl_h(void *_1) { return __builtin_lasx_xvldrepl_h(_1, 2); } ++// CHECK-LABEL: @xvldrepl_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr _1:%.*, i32 4) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvldrepl_w(void *_1) { return __builtin_lasx_xvldrepl_w(_1, 4); } ++// CHECK-LABEL: @xvldrepl_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr _1:%.*, i32 8) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvldrepl_d(void *_1) { return __builtin_lasx_xvldrepl_d(_1, 8); } ++// CHECK-LABEL: @xvpickve2gr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xvpickve2gr_w(v8i32 _1) { return __builtin_lasx_xvpickve2gr_w(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret i32 TMP0 ++// ++unsigned int xvpickve2gr_wu(v8i32 _1) { return __builtin_lasx_xvpickve2gr_wu(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret i64 TMP0 ++// ++long xvpickve2gr_d(v4i64 _1) { return __builtin_lasx_xvpickve2gr_d(_1, 1); } ++// CHECK-LABEL: @xvpickve2gr_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret i64 TMP0 ++// ++unsigned long int xvpickve2gr_du(v4i64 _1) { return __builtin_lasx_xvpickve2gr_du(_1, 1); } ++// CHECK-LABEL: @xvaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvsubwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvsubwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvsubwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvsubwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvsubwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvsubwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_d(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_w(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_h(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_b(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwev_q_du(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwev_d_wu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwev_w_hu(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwev_h_bu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvsubwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvsubwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvsubwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvsubwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvsubwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvsubwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_d(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_w(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_h(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_b(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwod_q_du(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwod_d_wu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwod_w_hu(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwod_h_bu(_1, _2); } ++// CHECK-LABEL: @xvaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvmulwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvmulwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvmulwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvmulwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_wu_w(_1, _2); } ++// CHECK-LABEL: @xvmulwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_hu_h(_1, _2); } ++// CHECK-LABEL: @xvmulwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_bu_b(_1, _2); } ++// CHECK-LABEL: @xvhaddw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhaddw_q_d(_1, _2); } ++// CHECK-LABEL: @xvhaddw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhaddw_qu_du(_1, _2); } ++// CHECK-LABEL: @xvhsubw_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhsubw_q_d(_1, _2); } ++// CHECK-LABEL: @xvhsubw_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhsubw_qu_du(_1, _2); } ++// CHECK-LABEL: @xvmaddwev_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwev_q_du(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwev_d_wu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwev_w_hu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwev_h_bu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwod_q_du(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwod_d_wu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwod_w_hu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwod_h_bu(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_du_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_wu_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_hu_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwev_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_bu_b(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_du_d(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_d_wu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_wu_w(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_w_hu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_hu_h(_1, _2, _3); } ++// CHECK-LABEL: @xvmaddwod_h_bu_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_bu_b(_1, _2, _3); } ++// CHECK-LABEL: @xvrotr_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvrotr_b(_1, _2); } ++// CHECK-LABEL: @xvrotr_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvrotr_h(_1, _2); } ++// CHECK-LABEL: @xvrotr_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvrotr_w(_1, _2); } ++// CHECK-LABEL: @xvrotr_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvrotr_d(_1, _2); } ++// CHECK-LABEL: @xvadd_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_q(_1, _2); } ++// CHECK-LABEL: @xvsub_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_q(_1, _2); } ++// CHECK-LABEL: @xvaddwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvaddwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmulwev_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmulwod_q_du_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_du_d(_1, _2); } ++// CHECK-LABEL: @xvmskgez_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmskgez_b(v32i8 _1) { return __builtin_lasx_xvmskgez_b(_1); } ++// CHECK-LABEL: @xvmsknz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvmsknz_b(v32i8 _1) { return __builtin_lasx_xvmsknz_b(_1); } ++// CHECK-LABEL: @xvexth_h_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvexth_h_b(v32i8 _1) { return __builtin_lasx_xvexth_h_b(_1); } ++// CHECK-LABEL: @xvexth_w_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvexth_w_h(v16i16 _1) { return __builtin_lasx_xvexth_w_h(_1); } ++// CHECK-LABEL: @xvexth_d_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvexth_d_w(v8i32 _1) { return __builtin_lasx_xvexth_d_w(_1); } ++// CHECK-LABEL: @xvexth_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvexth_q_d(v4i64 _1) { return __builtin_lasx_xvexth_q_d(_1); } ++// CHECK-LABEL: @xvexth_hu_bu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvexth_hu_bu(v32u8 _1) { return __builtin_lasx_xvexth_hu_bu(_1); } ++// CHECK-LABEL: @xvexth_wu_hu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvexth_wu_hu(v16u16 _1) { return __builtin_lasx_xvexth_wu_hu(_1); } ++// CHECK-LABEL: @xvexth_du_wu( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvexth_du_wu(v8u32 _1) { return __builtin_lasx_xvexth_du_wu(_1); } ++// CHECK-LABEL: @xvexth_qu_du( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvexth_qu_du(v4u64 _1) { return __builtin_lasx_xvexth_qu_du(_1); } ++// CHECK-LABEL: @xvrotri_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> _1:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvrotri_b(v32i8 _1) { return __builtin_lasx_xvrotri_b(_1, 1); } ++// CHECK-LABEL: @xvrotri_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> _1:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvrotri_h(v16i16 _1) { return __builtin_lasx_xvrotri_h(_1, 1); } ++// CHECK-LABEL: @xvrotri_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvrotri_w(v8i32 _1) { return __builtin_lasx_xvrotri_w(_1, 1); } ++// CHECK-LABEL: @xvrotri_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvrotri_d(v4i64 _1) { return __builtin_lasx_xvrotri_d(_1, 1); } ++// CHECK-LABEL: @xvextl_q_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvextl_q_d(v4i64 _1) { return __builtin_lasx_xvextl_q_d(_1); } ++// CHECK-LABEL: @xvsrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrlrni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvsrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrani_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_b_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_b_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_h_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_h_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_w_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_w_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_d_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_d_q(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_bu_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_bu_h(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_hu_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_hu_w(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_wu_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_wu_d(_1, _2, 1); } ++// CHECK-LABEL: @xvssrarni_du_q( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_du_q(_1, _2, 1); } ++// CHECK-LABEL: @xbnz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbnz_b(v32u8 _1) { return __builtin_lasx_xbnz_b(_1); } ++// CHECK-LABEL: @xbnz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbnz_d(v4u64 _1) { return __builtin_lasx_xbnz_d(_1); } ++// CHECK-LABEL: @xbnz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbnz_h(v16u16 _1) { return __builtin_lasx_xbnz_h(_1); } ++// CHECK-LABEL: @xbnz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbnz_v(v32u8 _1) { return __builtin_lasx_xbnz_v(_1); } ++// CHECK-LABEL: @xbnz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbnz_w(v8u32 _1) { return __builtin_lasx_xbnz_w(_1); } ++// CHECK-LABEL: @xbz_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbz_b(v32u8 _1) { return __builtin_lasx_xbz_b(_1); } ++// CHECK-LABEL: @xbz_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbz_d(v4u64 _1) { return __builtin_lasx_xbz_d(_1); } ++// CHECK-LABEL: @xbz_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbz_h(v16u16 _1) { return __builtin_lasx_xbz_h(_1); } ++// CHECK-LABEL: @xbz_v( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbz_v(v32u8 _1) { return __builtin_lasx_xbz_v(_1); } ++// CHECK-LABEL: @xbz_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> _1:%.*) ++// CHECK-NEXT: ret i32 TMP0 ++// ++int xbz_w(v8u32 _1) { return __builtin_lasx_xbz_w(_1); } ++// CHECK-LABEL: @xvfcmp_caf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_caf_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_caf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_caf_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_ceq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_ceq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_ceq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_ceq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cle_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cle_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_clt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_clt_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_clt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_clt_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cne_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cne_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cor_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cor_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cueq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cueq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cule_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cule_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cult_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cult_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cun_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cune_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cune_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_cun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cun_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_saf_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_saf_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_saf_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_saf_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_seq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_seq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_seq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_seq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sle_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sle_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sle_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sle_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_slt_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_slt_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_slt_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_slt_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sne_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sne_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sne_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sne_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sor_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sor_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sor_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sor_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sueq_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sueq_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sueq_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sueq_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sule_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sule_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sule_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sule_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sult_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sult_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sult_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sult_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sun_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sun_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sune_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> _1:%.*, <4 x double> _2:%.*) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sune_d(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sune_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sune_s(_1, _2); } ++// CHECK-LABEL: @xvfcmp_sun_s( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> _1:%.*, <8 x float> _2:%.*) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sun_s(_1, _2); } ++// CHECK-LABEL: @xvpickve_d_f( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> _1:%.*, i32 1) ++// CHECK-NEXT: ret <4 x double> TMP0 ++// ++v4f64 xvpickve_d_f(v4f64 _1) { return __builtin_lasx_xvpickve_d_f(_1, 1); } ++// CHECK-LABEL: @xvpickve_w_f( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> _1:%.*, i32 1) ++// CHECK-NEXT: ret <8 x float> TMP0 ++// ++v8f32 xvpickve_w_f(v8f32 _1) { return __builtin_lasx_xvpickve_w_f(_1, 1); } ++// CHECK-LABEL: @xvrepli_b( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) ++// CHECK-NEXT: ret <32 x i8> TMP0 ++// ++v32i8 xvrepli_b() { return __builtin_lasx_xvrepli_b(1); } ++// CHECK-LABEL: @xvrepli_d( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) ++// CHECK-NEXT: ret <4 x i64> TMP0 ++// ++v4i64 xvrepli_d() { return __builtin_lasx_xvrepli_d(1); } ++// CHECK-LABEL: @xvrepli_h( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) ++// CHECK-NEXT: ret <16 x i16> TMP0 ++// ++v16i16 xvrepli_h() { return __builtin_lasx_xvrepli_h(1); } ++// CHECK-LABEL: @xvrepli_w( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) ++// CHECK-NEXT: ret <8 x i32> TMP0 ++// ++v8i32 xvrepli_w() { return __builtin_lasx_xvrepli_w(1); } +-- +2.20.1 + + +From 5e7ffbc865541b46b1274418a7b2900295ebb5af Mon Sep 17 00:00:00 2001 +From: yjijd <licongtian@loongson.cn> +Date: Tue, 2 Jan 2024 11:46:00 +0800 +Subject: PATCH 8/8 ClangLoongArch Do not pass vector arguments via + vector registers (#74990) + +psABI v2.30 clarifies that vector arguments are passed according to the +base ABI by default. + +(cherry picked from commit 0e01c72c5645259d9a08a1a7ed39cb5cc41ce311) +--- + clang/lib/CodeGen/Targets/LoongArch.cpp | 7 - + .../CodeGen/LoongArch/lasx/builtin-alias.c | 4876 ++++++++++++----- + clang/test/CodeGen/LoongArch/lasx/builtin.c | 4874 +++++++++++----- + .../CodeGen/LoongArch/lsx/builtin-alias.c | 4746 +++++++++++----- + clang/test/CodeGen/LoongArch/lsx/builtin.c | 4746 +++++++++++----- + 5 files changed, 13485 insertions(+), 5764 deletions(-) + +diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp +index 26c68c3583b2..7483bf6d6d1e 100644 +--- a/clang/lib/CodeGen/Targets/LoongArch.cpp ++++ b/clang/lib/CodeGen/Targets/LoongArch.cpp +@@ -321,13 +321,6 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + return ABIArgInfo::getDirect(); + } + +- // Pass 128-bit/256-bit vector values via vector registers directly. +- if (Ty->isVectorType() && (((getContext().getTypeSize(Ty) == 128) && +- (getTarget().hasFeature("lsx"))) || +- ((getContext().getTypeSize(Ty) == 256) && +- getTarget().hasFeature("lasx")))) +- return ABIArgInfo::getDirect(); +- + // Complex types for the *f or *d ABI must be passed directly rather than + // using CoerceAndExpand. + if (IsFixed && Ty->isComplexType() && FRLen && FARsLeft >= 2) { +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c +index 09b2d5fcacf5..9a8ce224bcfd 100644 +--- a/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin-alias.c +@@ -5,4426 +5,6382 @@ + + // CHECK-LABEL: @xvsll_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2:!0-9+ ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __lasx_xvsll_b(_1, _2); } + // CHECK-LABEL: @xvsll_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __lasx_xvsll_h(_1, _2); } + // CHECK-LABEL: @xvsll_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __lasx_xvsll_w(_1, _2); } + // CHECK-LABEL: @xvsll_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __lasx_xvsll_d(_1, _2); } + // CHECK-LABEL: @xvslli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvslli_b(v32i8 _1) { return __lasx_xvslli_b(_1, 1); } + // CHECK-LABEL: @xvslli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvslli_h(v16i16 _1) { return __lasx_xvslli_h(_1, 1); } + // CHECK-LABEL: @xvslli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvslli_w(v8i32 _1) { return __lasx_xvslli_w(_1, 1); } + // CHECK-LABEL: @xvslli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvslli_d(v4i64 _1) { return __lasx_xvslli_d(_1, 1); } + // CHECK-LABEL: @xvsra_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __lasx_xvsra_b(_1, _2); } + // CHECK-LABEL: @xvsra_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __lasx_xvsra_h(_1, _2); } + // CHECK-LABEL: @xvsra_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __lasx_xvsra_w(_1, _2); } + // CHECK-LABEL: @xvsra_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __lasx_xvsra_d(_1, _2); } + // CHECK-LABEL: @xvsrai_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrai_b(v32i8 _1) { return __lasx_xvsrai_b(_1, 1); } + // CHECK-LABEL: @xvsrai_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrai_h(v16i16 _1) { return __lasx_xvsrai_h(_1, 1); } + // CHECK-LABEL: @xvsrai_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrai_w(v8i32 _1) { return __lasx_xvsrai_w(_1, 1); } + // CHECK-LABEL: @xvsrai_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrai_d(v4i64 _1) { return __lasx_xvsrai_d(_1, 1); } + // CHECK-LABEL: @xvsrar_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrar_b(_1, _2); } + // CHECK-LABEL: @xvsrar_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrar_h(_1, _2); } + // CHECK-LABEL: @xvsrar_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrar_w(_1, _2); } + // CHECK-LABEL: @xvsrar_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrar_d(_1, _2); } + // CHECK-LABEL: @xvsrari_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrari_b(v32i8 _1) { return __lasx_xvsrari_b(_1, 1); } + // CHECK-LABEL: @xvsrari_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrari_h(v16i16 _1) { return __lasx_xvsrari_h(_1, 1); } + // CHECK-LABEL: @xvsrari_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrari_w(v8i32 _1) { return __lasx_xvsrari_w(_1, 1); } + // CHECK-LABEL: @xvsrari_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrari_d(v4i64 _1) { return __lasx_xvsrari_d(_1, 1); } + // CHECK-LABEL: @xvsrl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrl_b(_1, _2); } + // CHECK-LABEL: @xvsrl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrl_h(_1, _2); } + // CHECK-LABEL: @xvsrl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrl_w(_1, _2); } + // CHECK-LABEL: @xvsrl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrl_d(_1, _2); } + // CHECK-LABEL: @xvsrli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrli_b(v32i8 _1) { return __lasx_xvsrli_b(_1, 1); } + // CHECK-LABEL: @xvsrli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrli_h(v16i16 _1) { return __lasx_xvsrli_h(_1, 1); } + // CHECK-LABEL: @xvsrli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrli_w(v8i32 _1) { return __lasx_xvsrli_w(_1, 1); } + // CHECK-LABEL: @xvsrli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrli_d(v4i64 _1) { return __lasx_xvsrli_d(_1, 1); } + // CHECK-LABEL: @xvsrlr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __lasx_xvsrlr_b(_1, _2); } + // CHECK-LABEL: @xvsrlr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlr_h(_1, _2); } + // CHECK-LABEL: @xvsrlr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlr_w(_1, _2); } + // CHECK-LABEL: @xvsrlr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlr_d(_1, _2); } + // CHECK-LABEL: @xvsrlri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrlri_b(v32i8 _1) { return __lasx_xvsrlri_b(_1, 1); } + // CHECK-LABEL: @xvsrlri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrlri_h(v16i16 _1) { return __lasx_xvsrlri_h(_1, 1); } + // CHECK-LABEL: @xvsrlri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrlri_w(v8i32 _1) { return __lasx_xvsrlri_w(_1, 1); } + // CHECK-LABEL: @xvsrlri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrlri_d(v4i64 _1) { return __lasx_xvsrlri_d(_1, 1); } + // CHECK-LABEL: @xvbitclr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitclr_b(_1, _2); } + // CHECK-LABEL: @xvbitclr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitclr_h(_1, _2); } + // CHECK-LABEL: @xvbitclr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitclr_w(_1, _2); } + // CHECK-LABEL: @xvbitclr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitclr_d(_1, _2); } + // CHECK-LABEL: @xvbitclri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvbitclri_b(v32u8 _1) { return __lasx_xvbitclri_b(_1, 1); } + // CHECK-LABEL: @xvbitclri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvbitclri_h(v16u16 _1) { return __lasx_xvbitclri_h(_1, 1); } + // CHECK-LABEL: @xvbitclri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvbitclri_w(v8u32 _1) { return __lasx_xvbitclri_w(_1, 1); } + // CHECK-LABEL: @xvbitclri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvbitclri_d(v4u64 _1) { return __lasx_xvbitclri_d(_1, 1); } + // CHECK-LABEL: @xvbitset_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitset_b(_1, _2); } + // CHECK-LABEL: @xvbitset_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitset_h(_1, _2); } + // CHECK-LABEL: @xvbitset_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitset_w(_1, _2); } + // CHECK-LABEL: @xvbitset_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitset_d(_1, _2); } + // CHECK-LABEL: @xvbitseti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvbitseti_b(v32u8 _1) { return __lasx_xvbitseti_b(_1, 1); } + // CHECK-LABEL: @xvbitseti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvbitseti_h(v16u16 _1) { return __lasx_xvbitseti_h(_1, 1); } + // CHECK-LABEL: @xvbitseti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvbitseti_w(v8u32 _1) { return __lasx_xvbitseti_w(_1, 1); } + // CHECK-LABEL: @xvbitseti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvbitseti_d(v4u64 _1) { return __lasx_xvbitseti_d(_1, 1); } + // CHECK-LABEL: @xvbitrev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitrev_b(_1, _2); } + // CHECK-LABEL: @xvbitrev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __lasx_xvbitrev_h(_1, _2); } + // CHECK-LABEL: @xvbitrev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __lasx_xvbitrev_w(_1, _2); } + // CHECK-LABEL: @xvbitrev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __lasx_xvbitrev_d(_1, _2); } + // CHECK-LABEL: @xvbitrevi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvbitrevi_b(v32u8 _1) { return __lasx_xvbitrevi_b(_1, 1); } + // CHECK-LABEL: @xvbitrevi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvbitrevi_h(v16u16 _1) { return __lasx_xvbitrevi_h(_1, 1); } + // CHECK-LABEL: @xvbitrevi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvbitrevi_w(v8u32 _1) { return __lasx_xvbitrevi_w(_1, 1); } + // CHECK-LABEL: @xvbitrevi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvbitrevi_d(v4u64 _1) { return __lasx_xvbitrevi_d(_1, 1); } + // CHECK-LABEL: @xvadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvadd_b(_1, _2); } + // CHECK-LABEL: @xvadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvadd_h(_1, _2); } + // CHECK-LABEL: @xvadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvadd_w(_1, _2); } + // CHECK-LABEL: @xvadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvadd_d(_1, _2); } + // CHECK-LABEL: @xvaddi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvaddi_bu(v32i8 _1) { return __lasx_xvaddi_bu(_1, 1); } + // CHECK-LABEL: @xvaddi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvaddi_hu(v16i16 _1) { return __lasx_xvaddi_hu(_1, 1); } + // CHECK-LABEL: @xvaddi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvaddi_wu(v8i32 _1) { return __lasx_xvaddi_wu(_1, 1); } + // CHECK-LABEL: @xvaddi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddi_du(v4i64 _1) { return __lasx_xvaddi_du(_1, 1); } + // CHECK-LABEL: @xvsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __lasx_xvsub_b(_1, _2); } + // CHECK-LABEL: @xvsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __lasx_xvsub_h(_1, _2); } + // CHECK-LABEL: @xvsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __lasx_xvsub_w(_1, _2); } + // CHECK-LABEL: @xvsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __lasx_xvsub_d(_1, _2); } + // CHECK-LABEL: @xvsubi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsubi_bu(v32i8 _1) { return __lasx_xvsubi_bu(_1, 1); } + // CHECK-LABEL: @xvsubi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsubi_hu(v16i16 _1) { return __lasx_xvsubi_hu(_1, 1); } + // CHECK-LABEL: @xvsubi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsubi_wu(v8i32 _1) { return __lasx_xvsubi_wu(_1, 1); } + // CHECK-LABEL: @xvsubi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubi_du(v4i64 _1) { return __lasx_xvsubi_du(_1, 1); } + // CHECK-LABEL: @xvmax_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __lasx_xvmax_b(_1, _2); } + // CHECK-LABEL: @xvmax_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __lasx_xvmax_h(_1, _2); } + // CHECK-LABEL: @xvmax_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __lasx_xvmax_w(_1, _2); } + // CHECK-LABEL: @xvmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __lasx_xvmax_d(_1, _2); } + // CHECK-LABEL: @xvmaxi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmaxi_b(v32i8 _1) { return __lasx_xvmaxi_b(_1, 1); } + // CHECK-LABEL: @xvmaxi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmaxi_h(v16i16 _1) { return __lasx_xvmaxi_h(_1, 1); } + // CHECK-LABEL: @xvmaxi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmaxi_w(v8i32 _1) { return __lasx_xvmaxi_w(_1, 1); } + // CHECK-LABEL: @xvmaxi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaxi_d(v4i64 _1) { return __lasx_xvmaxi_d(_1, 1); } + // CHECK-LABEL: @xvmax_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmax_bu(_1, _2); } + // CHECK-LABEL: @xvmax_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmax_hu(_1, _2); } + // CHECK-LABEL: @xvmax_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmax_wu(_1, _2); } + // CHECK-LABEL: @xvmax_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __lasx_xvmax_du(_1, _2); } + // CHECK-LABEL: @xvmaxi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvmaxi_bu(v32u8 _1) { return __lasx_xvmaxi_bu(_1, 1); } + // CHECK-LABEL: @xvmaxi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvmaxi_hu(v16u16 _1) { return __lasx_xvmaxi_hu(_1, 1); } + // CHECK-LABEL: @xvmaxi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvmaxi_wu(v8u32 _1) { return __lasx_xvmaxi_wu(_1, 1); } + // CHECK-LABEL: @xvmaxi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmaxi_du(v4u64 _1) { return __lasx_xvmaxi_du(_1, 1); } + // CHECK-LABEL: @xvmin_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __lasx_xvmin_b(_1, _2); } + // CHECK-LABEL: @xvmin_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __lasx_xvmin_h(_1, _2); } + // CHECK-LABEL: @xvmin_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __lasx_xvmin_w(_1, _2); } + // CHECK-LABEL: @xvmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __lasx_xvmin_d(_1, _2); } + // CHECK-LABEL: @xvmini_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmini_b(v32i8 _1) { return __lasx_xvmini_b(_1, 1); } + // CHECK-LABEL: @xvmini_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmini_h(v16i16 _1) { return __lasx_xvmini_h(_1, 1); } + // CHECK-LABEL: @xvmini_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmini_w(v8i32 _1) { return __lasx_xvmini_w(_1, 1); } + // CHECK-LABEL: @xvmini_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmini_d(v4i64 _1) { return __lasx_xvmini_d(_1, 1); } + // CHECK-LABEL: @xvmin_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmin_bu(_1, _2); } + // CHECK-LABEL: @xvmin_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmin_hu(_1, _2); } + // CHECK-LABEL: @xvmin_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmin_wu(_1, _2); } + // CHECK-LABEL: @xvmin_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __lasx_xvmin_du(_1, _2); } + // CHECK-LABEL: @xvmini_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvmini_bu(v32u8 _1) { return __lasx_xvmini_bu(_1, 1); } + // CHECK-LABEL: @xvmini_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvmini_hu(v16u16 _1) { return __lasx_xvmini_hu(_1, 1); } + // CHECK-LABEL: @xvmini_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvmini_wu(v8u32 _1) { return __lasx_xvmini_wu(_1, 1); } + // CHECK-LABEL: @xvmini_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmini_du(v4u64 _1) { return __lasx_xvmini_du(_1, 1); } + // CHECK-LABEL: @xvseq_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __lasx_xvseq_b(_1, _2); } + // CHECK-LABEL: @xvseq_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __lasx_xvseq_h(_1, _2); } + // CHECK-LABEL: @xvseq_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __lasx_xvseq_w(_1, _2); } + // CHECK-LABEL: @xvseq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __lasx_xvseq_d(_1, _2); } + // CHECK-LABEL: @xvseqi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvseqi_b(v32i8 _1) { return __lasx_xvseqi_b(_1, 1); } + // CHECK-LABEL: @xvseqi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvseqi_h(v16i16 _1) { return __lasx_xvseqi_h(_1, 1); } + // CHECK-LABEL: @xvseqi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvseqi_w(v8i32 _1) { return __lasx_xvseqi_w(_1, 1); } + // CHECK-LABEL: @xvseqi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvseqi_d(v4i64 _1) { return __lasx_xvseqi_d(_1, 1); } + // CHECK-LABEL: @xvslt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __lasx_xvslt_b(_1, _2); } + // CHECK-LABEL: @xvslt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __lasx_xvslt_h(_1, _2); } + // CHECK-LABEL: @xvslt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __lasx_xvslt_w(_1, _2); } + // CHECK-LABEL: @xvslt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __lasx_xvslt_d(_1, _2); } + // CHECK-LABEL: @xvslti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvslti_b(v32i8 _1) { return __lasx_xvslti_b(_1, 1); } + // CHECK-LABEL: @xvslti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvslti_h(v16i16 _1) { return __lasx_xvslti_h(_1, 1); } + // CHECK-LABEL: @xvslti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvslti_w(v8i32 _1) { return __lasx_xvslti_w(_1, 1); } + // CHECK-LABEL: @xvslti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvslti_d(v4i64 _1) { return __lasx_xvslti_d(_1, 1); } + // CHECK-LABEL: @xvslt_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __lasx_xvslt_bu(_1, _2); } + // CHECK-LABEL: @xvslt_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __lasx_xvslt_hu(_1, _2); } + // CHECK-LABEL: @xvslt_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __lasx_xvslt_wu(_1, _2); } + // CHECK-LABEL: @xvslt_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __lasx_xvslt_du(_1, _2); } + // CHECK-LABEL: @xvslti_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvslti_bu(v32u8 _1) { return __lasx_xvslti_bu(_1, 1); } + // CHECK-LABEL: @xvslti_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvslti_hu(v16u16 _1) { return __lasx_xvslti_hu(_1, 1); } + // CHECK-LABEL: @xvslti_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvslti_wu(v8u32 _1) { return __lasx_xvslti_wu(_1, 1); } + // CHECK-LABEL: @xvslti_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvslti_du(v4u64 _1) { return __lasx_xvslti_du(_1, 1); } + // CHECK-LABEL: @xvsle_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __lasx_xvsle_b(_1, _2); } + // CHECK-LABEL: @xvsle_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __lasx_xvsle_h(_1, _2); } + // CHECK-LABEL: @xvsle_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __lasx_xvsle_w(_1, _2); } + // CHECK-LABEL: @xvsle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __lasx_xvsle_d(_1, _2); } + // CHECK-LABEL: @xvslei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvslei_b(v32i8 _1) { return __lasx_xvslei_b(_1, 1); } + // CHECK-LABEL: @xvslei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvslei_h(v16i16 _1) { return __lasx_xvslei_h(_1, 1); } + // CHECK-LABEL: @xvslei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvslei_w(v8i32 _1) { return __lasx_xvslei_w(_1, 1); } + // CHECK-LABEL: @xvslei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvslei_d(v4i64 _1) { return __lasx_xvslei_d(_1, 1); } + // CHECK-LABEL: @xvsle_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsle_bu(_1, _2); } + // CHECK-LABEL: @xvsle_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsle_hu(_1, _2); } + // CHECK-LABEL: @xvsle_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsle_wu(_1, _2); } + // CHECK-LABEL: @xvsle_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __lasx_xvsle_du(_1, _2); } + // CHECK-LABEL: @xvslei_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvslei_bu(v32u8 _1) { return __lasx_xvslei_bu(_1, 1); } + // CHECK-LABEL: @xvslei_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvslei_hu(v16u16 _1) { return __lasx_xvslei_hu(_1, 1); } + // CHECK-LABEL: @xvslei_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvslei_wu(v8u32 _1) { return __lasx_xvslei_wu(_1, 1); } + // CHECK-LABEL: @xvslei_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvslei_du(v4u64 _1) { return __lasx_xvslei_du(_1, 1); } + // CHECK-LABEL: @xvsat_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsat_b(v32i8 _1) { return __lasx_xvsat_b(_1, 1); } + // CHECK-LABEL: @xvsat_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsat_h(v16i16 _1) { return __lasx_xvsat_h(_1, 1); } + // CHECK-LABEL: @xvsat_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsat_w(v8i32 _1) { return __lasx_xvsat_w(_1, 1); } + // CHECK-LABEL: @xvsat_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsat_d(v4i64 _1) { return __lasx_xvsat_d(_1, 1); } + // CHECK-LABEL: @xvsat_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvsat_bu(v32u8 _1) { return __lasx_xvsat_bu(_1, 1); } + // CHECK-LABEL: @xvsat_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvsat_hu(v16u16 _1) { return __lasx_xvsat_hu(_1, 1); } + // CHECK-LABEL: @xvsat_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvsat_wu(v8u32 _1) { return __lasx_xvsat_wu(_1, 1); } + // CHECK-LABEL: @xvsat_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvsat_du(v4u64 _1) { return __lasx_xvsat_du(_1, 1); } + // CHECK-LABEL: @xvadda_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __lasx_xvadda_b(_1, _2); } + // CHECK-LABEL: @xvadda_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __lasx_xvadda_h(_1, _2); } + // CHECK-LABEL: @xvadda_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __lasx_xvadda_w(_1, _2); } + // CHECK-LABEL: @xvadda_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __lasx_xvadda_d(_1, _2); } + // CHECK-LABEL: @xvsadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __lasx_xvsadd_b(_1, _2); } + // CHECK-LABEL: @xvsadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __lasx_xvsadd_h(_1, _2); } + // CHECK-LABEL: @xvsadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __lasx_xvsadd_w(_1, _2); } + // CHECK-LABEL: @xvsadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __lasx_xvsadd_d(_1, _2); } + // CHECK-LABEL: @xvsadd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsadd_bu(_1, _2); } + // CHECK-LABEL: @xvsadd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsadd_hu(_1, _2); } + // CHECK-LABEL: @xvsadd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsadd_wu(_1, _2); } + // CHECK-LABEL: @xvsadd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __lasx_xvsadd_du(_1, _2); } + // CHECK-LABEL: @xvavg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __lasx_xvavg_b(_1, _2); } + // CHECK-LABEL: @xvavg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __lasx_xvavg_h(_1, _2); } + // CHECK-LABEL: @xvavg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __lasx_xvavg_w(_1, _2); } + // CHECK-LABEL: @xvavg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __lasx_xvavg_d(_1, _2); } + // CHECK-LABEL: @xvavg_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavg_bu(_1, _2); } + // CHECK-LABEL: @xvavg_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavg_hu(_1, _2); } + // CHECK-LABEL: @xvavg_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavg_wu(_1, _2); } + // CHECK-LABEL: @xvavg_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __lasx_xvavg_du(_1, _2); } + // CHECK-LABEL: @xvavgr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __lasx_xvavgr_b(_1, _2); } + // CHECK-LABEL: @xvavgr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __lasx_xvavgr_h(_1, _2); } + // CHECK-LABEL: @xvavgr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __lasx_xvavgr_w(_1, _2); } + // CHECK-LABEL: @xvavgr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __lasx_xvavgr_d(_1, _2); } + // CHECK-LABEL: @xvavgr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __lasx_xvavgr_bu(_1, _2); } + // CHECK-LABEL: @xvavgr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __lasx_xvavgr_hu(_1, _2); } + // CHECK-LABEL: @xvavgr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __lasx_xvavgr_wu(_1, _2); } + // CHECK-LABEL: @xvavgr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __lasx_xvavgr_du(_1, _2); } + // CHECK-LABEL: @xvssub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __lasx_xvssub_b(_1, _2); } + // CHECK-LABEL: @xvssub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __lasx_xvssub_h(_1, _2); } + // CHECK-LABEL: @xvssub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __lasx_xvssub_w(_1, _2); } + // CHECK-LABEL: @xvssub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __lasx_xvssub_d(_1, _2); } + // CHECK-LABEL: @xvssub_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __lasx_xvssub_bu(_1, _2); } + // CHECK-LABEL: @xvssub_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __lasx_xvssub_hu(_1, _2); } + // CHECK-LABEL: @xvssub_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __lasx_xvssub_wu(_1, _2); } + // CHECK-LABEL: @xvssub_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __lasx_xvssub_du(_1, _2); } + // CHECK-LABEL: @xvabsd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __lasx_xvabsd_b(_1, _2); } + // CHECK-LABEL: @xvabsd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __lasx_xvabsd_h(_1, _2); } + // CHECK-LABEL: @xvabsd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __lasx_xvabsd_w(_1, _2); } + // CHECK-LABEL: @xvabsd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __lasx_xvabsd_d(_1, _2); } + // CHECK-LABEL: @xvabsd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __lasx_xvabsd_bu(_1, _2); } + // CHECK-LABEL: @xvabsd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __lasx_xvabsd_hu(_1, _2); } + // CHECK-LABEL: @xvabsd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __lasx_xvabsd_wu(_1, _2); } + // CHECK-LABEL: @xvabsd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __lasx_xvabsd_du(_1, _2); } + // CHECK-LABEL: @xvmul_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __lasx_xvmul_b(_1, _2); } + // CHECK-LABEL: @xvmul_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __lasx_xvmul_h(_1, _2); } + // CHECK-LABEL: @xvmul_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __lasx_xvmul_w(_1, _2); } + // CHECK-LABEL: @xvmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __lasx_xvmul_d(_1, _2); } + // CHECK-LABEL: @xvmadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _136:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> _136, <32 x i8> _247, <32 x i8> _358) ++// CHECK-NEXT: store <32 x i8> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmadd_b(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _136:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> _136, <16 x i16> _247, <16 x i16> _358) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmadd_h(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _136:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> _136, <8 x i32> _247, <8 x i32> _358) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmadd_w(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _136:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> _136, <32 x i8> _247, <32 x i8> _358) ++// CHECK-NEXT: store <32 x i8> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmsub_b(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _136:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> _136, <16 x i16> _247, <16 x i16> _358) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmsub_h(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _136:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> _136, <8 x i32> _247, <8 x i32> _358) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmsub_w(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvdiv_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __lasx_xvdiv_b(_1, _2); } + // CHECK-LABEL: @xvdiv_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __lasx_xvdiv_h(_1, _2); } + // CHECK-LABEL: @xvdiv_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __lasx_xvdiv_w(_1, _2); } + // CHECK-LABEL: @xvdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __lasx_xvdiv_d(_1, _2); } + // CHECK-LABEL: @xvdiv_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __lasx_xvdiv_bu(_1, _2); } + // CHECK-LABEL: @xvdiv_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __lasx_xvdiv_hu(_1, _2); } + // CHECK-LABEL: @xvdiv_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __lasx_xvdiv_wu(_1, _2); } + // CHECK-LABEL: @xvdiv_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __lasx_xvdiv_du(_1, _2); } + // CHECK-LABEL: @xvhaddw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhaddw_h_b(_1, _2); } + // CHECK-LABEL: @xvhaddw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhaddw_w_h(_1, _2); } + // CHECK-LABEL: @xvhaddw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhaddw_d_w(_1, _2); } + // CHECK-LABEL: @xvhaddw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhaddw_hu_bu(_1, _2); } + // CHECK-LABEL: @xvhaddw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhaddw_wu_hu(_1, _2); } + // CHECK-LABEL: @xvhaddw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhaddw_du_wu(_1, _2); } + // CHECK-LABEL: @xvhsubw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvhsubw_h_b(_1, _2); } + // CHECK-LABEL: @xvhsubw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvhsubw_w_h(_1, _2); } + // CHECK-LABEL: @xvhsubw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvhsubw_d_w(_1, _2); } + // CHECK-LABEL: @xvhsubw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __lasx_xvhsubw_hu_bu(_1, _2); } + // CHECK-LABEL: @xvhsubw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __lasx_xvhsubw_wu_hu(_1, _2); } + // CHECK-LABEL: @xvhsubw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __lasx_xvhsubw_du_wu(_1, _2); } + // CHECK-LABEL: @xvmod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __lasx_xvmod_b(_1, _2); } + // CHECK-LABEL: @xvmod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __lasx_xvmod_h(_1, _2); } + // CHECK-LABEL: @xvmod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __lasx_xvmod_w(_1, _2); } + // CHECK-LABEL: @xvmod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __lasx_xvmod_d(_1, _2); } + // CHECK-LABEL: @xvmod_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmod_bu(_1, _2); } + // CHECK-LABEL: @xvmod_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmod_hu(_1, _2); } + // CHECK-LABEL: @xvmod_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmod_wu(_1, _2); } + // CHECK-LABEL: @xvmod_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __lasx_xvmod_du(_1, _2); } + // CHECK-LABEL: @xvrepl128vei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvrepl128vei_b(v32i8 _1) { return __lasx_xvrepl128vei_b(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvrepl128vei_h(v16i16 _1) { return __lasx_xvrepl128vei_h(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvrepl128vei_w(v8i32 _1) { return __lasx_xvrepl128vei_w(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvrepl128vei_d(v4i64 _1) { return __lasx_xvrepl128vei_d(_1, 1); } + // CHECK-LABEL: @xvpickev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickev_b(_1, _2); } + // CHECK-LABEL: @xvpickev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickev_h(_1, _2); } + // CHECK-LABEL: @xvpickev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickev_w(_1, _2); } + // CHECK-LABEL: @xvpickev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickev_d(_1, _2); } + // CHECK-LABEL: @xvpickod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpickod_b(_1, _2); } + // CHECK-LABEL: @xvpickod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpickod_h(_1, _2); } + // CHECK-LABEL: @xvpickod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpickod_w(_1, _2); } + // CHECK-LABEL: @xvpickod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpickod_d(_1, _2); } + // CHECK-LABEL: @xvilvh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvh_b(_1, _2); } + // CHECK-LABEL: @xvilvh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvh_h(_1, _2); } + // CHECK-LABEL: @xvilvh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvh_w(_1, _2); } + // CHECK-LABEL: @xvilvh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvh_d(_1, _2); } + // CHECK-LABEL: @xvilvl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __lasx_xvilvl_b(_1, _2); } + // CHECK-LABEL: @xvilvl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __lasx_xvilvl_h(_1, _2); } + // CHECK-LABEL: @xvilvl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __lasx_xvilvl_w(_1, _2); } + // CHECK-LABEL: @xvilvl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __lasx_xvilvl_d(_1, _2); } + // CHECK-LABEL: @xvpackev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackev_b(_1, _2); } + // CHECK-LABEL: @xvpackev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackev_h(_1, _2); } + // CHECK-LABEL: @xvpackev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackev_w(_1, _2); } + // CHECK-LABEL: @xvpackev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackev_d(_1, _2); } + // CHECK-LABEL: @xvpackod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __lasx_xvpackod_b(_1, _2); } + // CHECK-LABEL: @xvpackod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __lasx_xvpackod_h(_1, _2); } + // CHECK-LABEL: @xvpackod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __lasx_xvpackod_w(_1, _2); } + // CHECK-LABEL: @xvpackod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __lasx_xvpackod_d(_1, _2); } + // CHECK-LABEL: @xvshuf_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _136:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> _136, <32 x i8> _247, <32 x i8> _358) ++// CHECK-NEXT: store <32 x i8> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvshuf_b(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _136:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> _136, <16 x i16> _247, <16 x i16> _358) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvshuf_h(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _136:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> _136, <8 x i32> _247, <8 x i32> _358) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __lasx_xvshuf_w(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvshuf_d(_1, _2, _3); } + // CHECK-LABEL: @xvand_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __lasx_xvand_v(_1, _2); } + // CHECK-LABEL: @xvandi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvandi_b(v32u8 _1) { return __lasx_xvandi_b(_1, 1); } + // CHECK-LABEL: @xvor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __lasx_xvor_v(_1, _2); } + // CHECK-LABEL: @xvori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvori_b(v32u8 _1) { return __lasx_xvori_b(_1, 1); } + // CHECK-LABEL: @xvnor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __lasx_xvnor_v(_1, _2); } + // CHECK-LABEL: @xvnori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvnori_b(v32u8 _1) { return __lasx_xvnori_b(_1, 1); } + // CHECK-LABEL: @xvxor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __lasx_xvxor_v(_1, _2); } + // CHECK-LABEL: @xvxori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvxori_b(v32u8 _1) { return __lasx_xvxori_b(_1, 1); } + // CHECK-LABEL: @xvbitsel_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _136:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> _136, <32 x i8> _247, <32 x i8> _358) ++// CHECK-NEXT: store <32 x i8> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __lasx_xvbitsel_v(_1, _2, _3); } + // CHECK-LABEL: @xvbitseli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __lasx_xvbitseli_b(_1, _2, 1); } + // CHECK-LABEL: @xvshuf4i_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvshuf4i_b(v32i8 _1) { return __lasx_xvshuf4i_b(_1, 1); } + // CHECK-LABEL: @xvshuf4i_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvshuf4i_h(v16i16 _1) { return __lasx_xvshuf4i_h(_1, 1); } + // CHECK-LABEL: @xvshuf4i_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvshuf4i_w(v8i32 _1) { return __lasx_xvshuf4i_w(_1, 1); } + // CHECK-LABEL: @xvreplgr2vr_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: store <32 x i8> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvreplgr2vr_b(int _1) { return __lasx_xvreplgr2vr_b(_1); } + // CHECK-LABEL: @xvreplgr2vr_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: store <16 x i16> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvreplgr2vr_h(int _1) { return __lasx_xvreplgr2vr_h(_1); } + // CHECK-LABEL: @xvreplgr2vr_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: store <8 x i32> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvreplgr2vr_w(int _1) { return __lasx_xvreplgr2vr_w(_1); } + // CHECK-LABEL: @xvreplgr2vr_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: CONV:%.* = sext i32 _1:%.* to i64 + // CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 CONV) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: store <4 x i64> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvreplgr2vr_d(int _1) { return __lasx_xvreplgr2vr_d(_1); } + // CHECK-LABEL: @xvpcnt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> _112) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvpcnt_b(v32i8 _1) { return __lasx_xvpcnt_b(_1); } + // CHECK-LABEL: @xvpcnt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _112:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> _112) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvpcnt_h(v16i16 _1) { return __lasx_xvpcnt_h(_1); } + // CHECK-LABEL: @xvpcnt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _112:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> _112) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvpcnt_w(v8i32 _1) { return __lasx_xvpcnt_w(_1); } + // CHECK-LABEL: @xvpcnt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvpcnt_d(v4i64 _1) { return __lasx_xvpcnt_d(_1); } + // CHECK-LABEL: @xvclo_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> _112) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvclo_b(v32i8 _1) { return __lasx_xvclo_b(_1); } + // CHECK-LABEL: @xvclo_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _112:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> _112) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvclo_h(v16i16 _1) { return __lasx_xvclo_h(_1); } + // CHECK-LABEL: @xvclo_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _112:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> _112) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvclo_w(v8i32 _1) { return __lasx_xvclo_w(_1); } + // CHECK-LABEL: @xvclo_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvclo_d(v4i64 _1) { return __lasx_xvclo_d(_1); } + // CHECK-LABEL: @xvclz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> _112) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvclz_b(v32i8 _1) { return __lasx_xvclz_b(_1); } + // CHECK-LABEL: @xvclz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _112:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> _112) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvclz_h(v16i16 _1) { return __lasx_xvclz_h(_1); } + // CHECK-LABEL: @xvclz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _112:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> _112) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvclz_w(v8i32 _1) { return __lasx_xvclz_w(_1); } + // CHECK-LABEL: @xvclz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvclz_d(v4i64 _1) { return __lasx_xvclz_d(_1); } + // CHECK-LABEL: @xvfadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __lasx_xvfadd_s(_1, _2); } + // CHECK-LABEL: @xvfadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x double> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __lasx_xvfadd_d(_1, _2); } + // CHECK-LABEL: @xvfsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __lasx_xvfsub_s(_1, _2); } + // CHECK-LABEL: @xvfsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x double> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __lasx_xvfsub_d(_1, _2); } + // CHECK-LABEL: @xvfmul_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmul_s(_1, _2); } + // CHECK-LABEL: @xvfmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x double> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmul_d(_1, _2); } + // CHECK-LABEL: @xvfdiv_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __lasx_xvfdiv_s(_1, _2); } + // CHECK-LABEL: @xvfdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x double> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __lasx_xvfdiv_d(_1, _2); } + // CHECK-LABEL: @xvfcvt_h_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcvt_h_s(_1, _2); } + // CHECK-LABEL: @xvfcvt_s_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcvt_s_d(_1, _2); } + // CHECK-LABEL: @xvfmin_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmin_s(_1, _2); } + // CHECK-LABEL: @xvfmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x double> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmin_d(_1, _2); } + // CHECK-LABEL: @xvfmina_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmina_s(_1, _2); } + // CHECK-LABEL: @xvfmina_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x double> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmina_d(_1, _2); } + // CHECK-LABEL: @xvfmax_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmax_s(_1, _2); } + // CHECK-LABEL: @xvfmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x double> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmax_d(_1, _2); } + // CHECK-LABEL: @xvfmaxa_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __lasx_xvfmaxa_s(_1, _2); } + // CHECK-LABEL: @xvfmaxa_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x double> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __lasx_xvfmaxa_d(_1, _2); } + // CHECK-LABEL: @xvfclass_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfclass_s(v8f32 _1) { return __lasx_xvfclass_s(_1); } + // CHECK-LABEL: @xvfclass_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfclass_d(v4f64 _1) { return __lasx_xvfclass_d(_1); } + // CHECK-LABEL: @xvfsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfsqrt_s(v8f32 _1) { return __lasx_xvfsqrt_s(_1); } + // CHECK-LABEL: @xvfsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfsqrt_d(v4f64 _1) { return __lasx_xvfsqrt_d(_1); } + // CHECK-LABEL: @xvfrecip_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfrecip_s(v8f32 _1) { return __lasx_xvfrecip_s(_1); } + // CHECK-LABEL: @xvfrecip_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfrecip_d(v4f64 _1) { return __lasx_xvfrecip_d(_1); } + // CHECK-LABEL: @xvfrint_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfrint_s(v8f32 _1) { return __lasx_xvfrint_s(_1); } + // CHECK-LABEL: @xvfrint_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfrint_d(v4f64 _1) { return __lasx_xvfrint_d(_1); } + // CHECK-LABEL: @xvfrsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfrsqrt_s(v8f32 _1) { return __lasx_xvfrsqrt_s(_1); } + // CHECK-LABEL: @xvfrsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfrsqrt_d(v4f64 _1) { return __lasx_xvfrsqrt_d(_1); } + // CHECK-LABEL: @xvflogb_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvflogb_s(v8f32 _1) { return __lasx_xvflogb_s(_1); } + // CHECK-LABEL: @xvflogb_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvflogb_d(v4f64 _1) { return __lasx_xvflogb_d(_1); } + // CHECK-LABEL: @xvfcvth_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _112:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> _112) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfcvth_s_h(v16i16 _1) { return __lasx_xvfcvth_s_h(_1); } + // CHECK-LABEL: @xvfcvth_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfcvth_d_s(v8f32 _1) { return __lasx_xvfcvth_d_s(_1); } + // CHECK-LABEL: @xvfcvtl_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _112:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> _112) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfcvtl_s_h(v16i16 _1) { return __lasx_xvfcvtl_s_h(_1); } + // CHECK-LABEL: @xvfcvtl_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfcvtl_d_s(v8f32 _1) { return __lasx_xvfcvtl_d_s(_1); } + // CHECK-LABEL: @xvftint_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftint_w_s(v8f32 _1) { return __lasx_xvftint_w_s(_1); } + // CHECK-LABEL: @xvftint_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftint_l_d(v4f64 _1) { return __lasx_xvftint_l_d(_1); } + // CHECK-LABEL: @xvftint_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvftint_wu_s(v8f32 _1) { return __lasx_xvftint_wu_s(_1); } + // CHECK-LABEL: @xvftint_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvftint_lu_d(v4f64 _1) { return __lasx_xvftint_lu_d(_1); } + // CHECK-LABEL: @xvftintrz_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftintrz_w_s(v8f32 _1) { return __lasx_xvftintrz_w_s(_1); } + // CHECK-LABEL: @xvftintrz_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrz_l_d(v4f64 _1) { return __lasx_xvftintrz_l_d(_1); } + // CHECK-LABEL: @xvftintrz_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvftintrz_wu_s(v8f32 _1) { return __lasx_xvftintrz_wu_s(_1); } + // CHECK-LABEL: @xvftintrz_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvftintrz_lu_d(v4f64 _1) { return __lasx_xvftintrz_lu_d(_1); } + // CHECK-LABEL: @xvffint_s_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _112:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> _112) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_w(v8i32 _1) { return __lasx_xvffint_s_w(_1); } + // CHECK-LABEL: @xvffint_d_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvffint_d_l(v4i64 _1) { return __lasx_xvffint_d_l(_1); } + // CHECK-LABEL: @xvffint_s_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _112:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> _112) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_wu(v8u32 _1) { return __lasx_xvffint_s_wu(_1); } + // CHECK-LABEL: @xvffint_d_lu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvffint_d_lu(v4u64 _1) { return __lasx_xvffint_d_lu(_1); } + // CHECK-LABEL: @xvreplve_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> _1:%.*, i32 _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> _112, i32 _2:%.*) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvreplve_b(v32i8 _1, int _2) { return __lasx_xvreplve_b(_1, _2); } + // CHECK-LABEL: @xvreplve_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> _1:%.*, i32 _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _112:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> _112, i32 _2:%.*) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvreplve_h(v16i16 _1, int _2) { return __lasx_xvreplve_h(_1, _2); } + // CHECK-LABEL: @xvreplve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> _1:%.*, i32 _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _112:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> _112, i32 _2:%.*) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvreplve_w(v8i32 _1, int _2) { return __lasx_xvreplve_w(_1, _2); } + // CHECK-LABEL: @xvreplve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> _1:%.*, i32 _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> _1, i32 _2:%.*) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvreplve_d(v4i64 _1, int _2) { return __lasx_xvreplve_d(_1, _2); } + // CHECK-LABEL: @xvpermi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __lasx_xvpermi_w(_1, _2, 1); } + // CHECK-LABEL: @xvandn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __lasx_xvandn_v(_1, _2); } + // CHECK-LABEL: @xvneg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> _112) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvneg_b(v32i8 _1) { return __lasx_xvneg_b(_1); } + // CHECK-LABEL: @xvneg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _112:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> _112) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvneg_h(v16i16 _1) { return __lasx_xvneg_h(_1); } + // CHECK-LABEL: @xvneg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _112:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> _112) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvneg_w(v8i32 _1) { return __lasx_xvneg_w(_1); } + // CHECK-LABEL: @xvneg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvneg_d(v4i64 _1) { return __lasx_xvneg_d(_1); } + // CHECK-LABEL: @xvmuh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __lasx_xvmuh_b(_1, _2); } + // CHECK-LABEL: @xvmuh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __lasx_xvmuh_h(_1, _2); } + // CHECK-LABEL: @xvmuh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __lasx_xvmuh_w(_1, _2); } + // CHECK-LABEL: @xvmuh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __lasx_xvmuh_d(_1, _2); } + // CHECK-LABEL: @xvmuh_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmuh_bu(_1, _2); } + // CHECK-LABEL: @xvmuh_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmuh_hu(_1, _2); } + // CHECK-LABEL: @xvmuh_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmuh_wu(_1, _2); } + // CHECK-LABEL: @xvmuh_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __lasx_xvmuh_du(_1, _2); } + // CHECK-LABEL: @xvsllwil_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsllwil_h_b(v32i8 _1) { return __lasx_xvsllwil_h_b(_1, 1); } + // CHECK-LABEL: @xvsllwil_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsllwil_w_h(v16i16 _1) { return __lasx_xvsllwil_w_h(_1, 1); } + // CHECK-LABEL: @xvsllwil_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsllwil_d_w(v8i32 _1) { return __lasx_xvsllwil_d_w(_1, 1); } + // CHECK-LABEL: @xvsllwil_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvsllwil_hu_bu(v32u8 _1) { return __lasx_xvsllwil_hu_bu(_1, 1); } + // CHECK-LABEL: @xvsllwil_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvsllwil_wu_hu(v16u16 _1) { return __lasx_xvsllwil_wu_hu(_1, 1); } + // CHECK-LABEL: @xvsllwil_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvsllwil_du_wu(v8u32 _1) { return __lasx_xvsllwil_du_wu(_1, 1); } + // CHECK-LABEL: @xvsran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsran_b_h(_1, _2); } + // CHECK-LABEL: @xvsran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsran_h_w(_1, _2); } + // CHECK-LABEL: @xvsran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsran_w_d(_1, _2); } + // CHECK-LABEL: @xvssran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssran_b_h(_1, _2); } + // CHECK-LABEL: @xvssran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssran_h_w(_1, _2); } + // CHECK-LABEL: @xvssran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssran_w_d(_1, _2); } + // CHECK-LABEL: @xvssran_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssran_bu_h(_1, _2); } + // CHECK-LABEL: @xvssran_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssran_hu_w(_1, _2); } + // CHECK-LABEL: @xvssran_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssran_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrarn_b_h(_1, _2); } + // CHECK-LABEL: @xvsrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrarn_h_w(_1, _2); } + // CHECK-LABEL: @xvsrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrarn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrarn_b_h(_1, _2); } + // CHECK-LABEL: @xvssrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrarn_h_w(_1, _2); } + // CHECK-LABEL: @xvssrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrarn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrarn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrarn_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrarn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrarn_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrarn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrarn_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrln_b_h(_1, _2); } + // CHECK-LABEL: @xvsrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrln_h_w(_1, _2); } + // CHECK-LABEL: @xvsrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrln_w_d(_1, _2); } + // CHECK-LABEL: @xvssrln_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrln_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrln_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrln_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrln_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrln_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrn_b_h(_1, _2); } + // CHECK-LABEL: @xvsrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrn_h_w(_1, _2); } + // CHECK-LABEL: @xvsrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrlrn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __lasx_xvssrlrn_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrlrn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __lasx_xvssrlrn_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrlrn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __lasx_xvssrlrn_wu_d(_1, _2); } + // CHECK-LABEL: @xvfrstpi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __lasx_xvfrstpi_b(_1, _2, 1); } + // CHECK-LABEL: @xvfrstpi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __lasx_xvfrstpi_h(_1, _2, 1); } + // CHECK-LABEL: @xvfrstp_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _136:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> _136, <32 x i8> _247, <32 x i8> _358) ++// CHECK-NEXT: store <32 x i8> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __lasx_xvfrstp_b(_1, _2, _3); } + // CHECK-LABEL: @xvfrstp_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _136:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> _136, <16 x i16> _247, <16 x i16> _358) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __lasx_xvfrstp_h(_1, _2, _3); } + // CHECK-LABEL: @xvshuf4i_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __lasx_xvshuf4i_d(_1, _2, 1); } + // CHECK-LABEL: @xvbsrl_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvbsrl_v(v32i8 _1) { return __lasx_xvbsrl_v(_1, 1); } + // CHECK-LABEL: @xvbsll_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvbsll_v(v32i8 _1) { return __lasx_xvbsll_v(_1, 1); } + // CHECK-LABEL: @xvextrins_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __lasx_xvextrins_b(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __lasx_xvextrins_h(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __lasx_xvextrins_w(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __lasx_xvextrins_d(_1, _2, 1); } + // CHECK-LABEL: @xvmskltz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> _112) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmskltz_b(v32i8 _1) { return __lasx_xvmskltz_b(_1); } + // CHECK-LABEL: @xvmskltz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _112:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> _112) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmskltz_h(v16i16 _1) { return __lasx_xvmskltz_h(_1); } + // CHECK-LABEL: @xvmskltz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _112:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> _112) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmskltz_w(v8i32 _1) { return __lasx_xvmskltz_w(_1); } + // CHECK-LABEL: @xvmskltz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmskltz_d(v4i64 _1) { return __lasx_xvmskltz_d(_1); } + // CHECK-LABEL: @xvsigncov_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __lasx_xvsigncov_b(_1, _2); } + // CHECK-LABEL: @xvsigncov_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __lasx_xvsigncov_h(_1, _2); } + // CHECK-LABEL: @xvsigncov_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __lasx_xvsigncov_w(_1, _2); } + // CHECK-LABEL: @xvsigncov_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __lasx_xvsigncov_d(_1, _2); } + // CHECK-LABEL: @xvfmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> _1:%.*, <8 x float> _2:%.*, <8 x float> _3:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <8 x float>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> _1, <8 x float> _2, <8 x float> _3) ++// CHECK-NEXT: store <8 x float> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmadd_s(_1, _2, _3); } + // CHECK-LABEL: @xvfmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> _1:%.*, <4 x double> _2:%.*, <4 x double> _3:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x double>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> _1, <4 x double> _2, <4 x double> _3) ++// CHECK-NEXT: store <4 x double> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvfmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> _1:%.*, <8 x float> _2:%.*, <8 x float> _3:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <8 x float>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> _1, <8 x float> _2, <8 x float> _3) ++// CHECK-NEXT: store <8 x float> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfmsub_s(_1, _2, _3); } + // CHECK-LABEL: @xvfmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> _1:%.*, <4 x double> _2:%.*, <4 x double> _3:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x double>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> _1, <4 x double> _2, <4 x double> _3) ++// CHECK-NEXT: store <4 x double> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvfnmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> _1:%.*, <8 x float> _2:%.*, <8 x float> _3:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <8 x float>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> _1, <8 x float> _2, <8 x float> _3) ++// CHECK-NEXT: store <8 x float> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmadd_s(_1, _2, _3); } + // CHECK-LABEL: @xvfnmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> _1:%.*, <4 x double> _2:%.*, <4 x double> _3:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x double>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> _1, <4 x double> _2, <4 x double> _3) ++// CHECK-NEXT: store <4 x double> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvfnmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> _1:%.*, <8 x float> _2:%.*, <8 x float> _3:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <8 x float>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> _1, <8 x float> _2, <8 x float> _3) ++// CHECK-NEXT: store <8 x float> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __lasx_xvfnmsub_s(_1, _2, _3); } + // CHECK-LABEL: @xvfnmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> _1:%.*, <4 x double> _2:%.*, <4 x double> _3:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x double>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> _1, <4 x double> _2, <4 x double> _3) ++// CHECK-NEXT: store <4 x double> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __lasx_xvfnmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvftintrne_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftintrne_w_s(v8f32 _1) { return __lasx_xvftintrne_w_s(_1); } + // CHECK-LABEL: @xvftintrne_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrne_l_d(v4f64 _1) { return __lasx_xvftintrne_l_d(_1); } + // CHECK-LABEL: @xvftintrp_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftintrp_w_s(v8f32 _1) { return __lasx_xvftintrp_w_s(_1); } + // CHECK-LABEL: @xvftintrp_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrp_l_d(v4f64 _1) { return __lasx_xvftintrp_l_d(_1); } + // CHECK-LABEL: @xvftintrm_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftintrm_w_s(v8f32 _1) { return __lasx_xvftintrm_w_s(_1); } + // CHECK-LABEL: @xvftintrm_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrm_l_d(v4f64 _1) { return __lasx_xvftintrm_l_d(_1); } + // CHECK-LABEL: @xvftint_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftint_w_d(_1, _2); } + // CHECK-LABEL: @xvffint_s_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __lasx_xvffint_s_l(_1, _2); } + // CHECK-LABEL: @xvftintrz_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrz_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrp_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrp_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrm_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrm_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrne_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __lasx_xvftintrne_w_d(_1, _2); } + // CHECK-LABEL: @xvftinth_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftinth_l_s(v8f32 _1) { return __lasx_xvftinth_l_s(_1); } + // CHECK-LABEL: @xvftintl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintl_l_s(v8f32 _1) { return __lasx_xvftintl_l_s(_1); } + // CHECK-LABEL: @xvffinth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _112:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> _112) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvffinth_d_w(v8i32 _1) { return __lasx_xvffinth_d_w(_1); } + // CHECK-LABEL: @xvffintl_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _112:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> _112) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvffintl_d_w(v8i32 _1) { return __lasx_xvffintl_d_w(_1); } + // CHECK-LABEL: @xvftintrzh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrzh_l_s(v8f32 _1) { return __lasx_xvftintrzh_l_s(_1); } + // CHECK-LABEL: @xvftintrzl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrzl_l_s(v8f32 _1) { return __lasx_xvftintrzl_l_s(_1); } + // CHECK-LABEL: @xvftintrph_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrph_l_s(v8f32 _1) { return __lasx_xvftintrph_l_s(_1); } + // CHECK-LABEL: @xvftintrpl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrpl_l_s(v8f32 _1) { return __lasx_xvftintrpl_l_s(_1); } + // CHECK-LABEL: @xvftintrmh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrmh_l_s(v8f32 _1) { return __lasx_xvftintrmh_l_s(_1); } + // CHECK-LABEL: @xvftintrml_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrml_l_s(v8f32 _1) { return __lasx_xvftintrml_l_s(_1); } + // CHECK-LABEL: @xvftintrneh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrneh_l_s(v8f32 _1) { return __lasx_xvftintrneh_l_s(_1); } + // CHECK-LABEL: @xvftintrnel_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrnel_l_s(v8f32 _1) { return __lasx_xvftintrnel_l_s(_1); } + // CHECK-LABEL: @xvfrintrne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <8 x float> TMP0 to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> TMP1 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrne_s(v8f32 _1) { return __lasx_xvfrintrne_s(_1); } + // CHECK-LABEL: @xvfrintrne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <4 x double> TMP0 to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> TMP1 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrne_d(v4f64 _1) { return __lasx_xvfrintrne_d(_1); } + // CHECK-LABEL: @xvfrintrz_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <8 x float> TMP0 to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> TMP1 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrz_s(v8f32 _1) { return __lasx_xvfrintrz_s(_1); } + // CHECK-LABEL: @xvfrintrz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <4 x double> TMP0 to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> TMP1 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrz_d(v4f64 _1) { return __lasx_xvfrintrz_d(_1); } + // CHECK-LABEL: @xvfrintrp_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <8 x float> TMP0 to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> TMP1 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrp_s(v8f32 _1) { return __lasx_xvfrintrp_s(_1); } + // CHECK-LABEL: @xvfrintrp_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <4 x double> TMP0 to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> TMP1 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrp_d(v4f64 _1) { return __lasx_xvfrintrp_d(_1); } + // CHECK-LABEL: @xvfrintrm_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <8 x float> TMP0 to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> TMP1 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrm_s(v8f32 _1) { return __lasx_xvfrintrm_s(_1); } + // CHECK-LABEL: @xvfrintrm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <4 x double> TMP0 to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> TMP1 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrm_d(v4f64 _1) { return __lasx_xvfrintrm_d(_1); } + // CHECK-LABEL: @xvld( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: store <32 x i8> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvld(void * _1) { return __lasx_xvld(_1, 1); } + // CHECK-LABEL: @xvst( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> _1:%.*, ptr _2:%.*, i32 1) ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> _1, ptr _2:%.*, i32 1) + // CHECK-NEXT: ret void + // + void xvst(v32i8 _1, void * _2) { return __lasx_xvst(_1, _2, 1); } + // CHECK-LABEL: @xvstelm_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> _1:%.*, ptr _2:%.*, i32 1, i32 1) ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> _1, ptr _2:%.*, i32 1, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_b(v32i8 _1, void * _2) { return __lasx_xvstelm_b(_1, _2, 1, 1); } + // CHECK-LABEL: @xvstelm_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> _1:%.*, ptr _2:%.*, i32 2, i32 1) ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> _1, ptr _2:%.*, i32 2, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_h(v16i16 _1, void * _2) { return __lasx_xvstelm_h(_1, _2, 2, 1); } + // CHECK-LABEL: @xvstelm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> _1:%.*, ptr _2:%.*, i32 4, i32 1) ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> _1, ptr _2:%.*, i32 4, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_w(v8i32 _1, void * _2) { return __lasx_xvstelm_w(_1, _2, 4, 1); } + // CHECK-LABEL: @xvstelm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> _1:%.*, ptr _2:%.*, i32 8, i32 1) ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> _1, ptr _2:%.*, i32 8, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_d(v4i64 _1, void * _2) { return __lasx_xvstelm_d(_1, _2, 8, 1); } + // CHECK-LABEL: @xvinsve0_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __lasx_xvinsve0_w(_1, _2, 1); } + // CHECK-LABEL: @xvinsve0_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __lasx_xvinsve0_d(_1, _2, 1); } + // CHECK-LABEL: @xvpickve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvpickve_w(v8i32 _1) { return __lasx_xvpickve_w(_1, 1); } + // CHECK-LABEL: @xvpickve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvpickve_d(v4i64 _1) { return __lasx_xvpickve_d(_1, 1); } + // CHECK-LABEL: @xvssrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrn_b_h(_1, _2); } + // CHECK-LABEL: @xvssrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrn_h_w(_1, _2); } + // CHECK-LABEL: @xvssrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __lasx_xvssrln_b_h(_1, _2); } + // CHECK-LABEL: @xvssrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __lasx_xvssrln_h_w(_1, _2); } + // CHECK-LABEL: @xvssrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __lasx_xvssrln_w_d(_1, _2); } + // CHECK-LABEL: @xvorn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __lasx_xvorn_v(_1, _2); } + // CHECK-LABEL: @xvldi( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: store <4 x i64> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvldi() { return __lasx_xvldi(1); } + // CHECK-LABEL: @xvldx( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr _1:%.*, i64 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr _1:%.*, i64 1), !noalias META5:!0-9+ ++// CHECK-NEXT: store <32 x i8> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvldx(void * _1) { return __lasx_xvldx(_1, 1); } + // CHECK-LABEL: @xvstx( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> _1:%.*, ptr _2:%.*, i64 1) ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> _112, ptr _2:%.*, i64 1) + // CHECK-NEXT: ret void + // + void xvstx(v32i8 _1, void * _2) { return __lasx_xvstx(_1, _2, 1); } + // CHECK-LABEL: @xvextl_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvextl_qu_du(v4u64 _1) { return __lasx_xvextl_qu_du(_1); } + // CHECK-LABEL: @xvinsgr2vr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> _1:%.*, i32 1, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> _1, i32 1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvinsgr2vr_w(v8i32 _1) { return __lasx_xvinsgr2vr_w(_1, 1, 1); } + // CHECK-LABEL: @xvinsgr2vr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> _1:%.*, i64 1, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> _1, i64 1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvinsgr2vr_d(v4i64 _1) { return __lasx_xvinsgr2vr_d(_1, 1, 1); } + // CHECK-LABEL: @xvreplve0_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> _112) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvreplve0_b(v32i8 _1) { return __lasx_xvreplve0_b(_1); } + // CHECK-LABEL: @xvreplve0_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _112:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> _112) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvreplve0_h(v16i16 _1) { return __lasx_xvreplve0_h(_1); } + // CHECK-LABEL: @xvreplve0_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _112:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> _112) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvreplve0_w(v8i32 _1) { return __lasx_xvreplve0_w(_1); } + // CHECK-LABEL: @xvreplve0_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvreplve0_d(v4i64 _1) { return __lasx_xvreplve0_d(_1); } + // CHECK-LABEL: @xvreplve0_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> _112) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvreplve0_q(v32i8 _1) { return __lasx_xvreplve0_q(_1); } + // CHECK-LABEL: @vext2xv_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> _112) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 vext2xv_h_b(v32i8 _1) { return __lasx_vext2xv_h_b(_1); } + // CHECK-LABEL: @vext2xv_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _112:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> _112) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_w_h(v16i16 _1) { return __lasx_vext2xv_w_h(_1); } + // CHECK-LABEL: @vext2xv_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _112:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> _112) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_w(v8i32 _1) { return __lasx_vext2xv_d_w(_1); } + // CHECK-LABEL: @vext2xv_w_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> _112) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_w_b(v32i8 _1) { return __lasx_vext2xv_w_b(_1); } + // CHECK-LABEL: @vext2xv_d_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _112:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> _112) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_h(v16i16 _1) { return __lasx_vext2xv_d_h(_1); } + // CHECK-LABEL: @vext2xv_d_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> _112) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_b(v32i8 _1) { return __lasx_vext2xv_d_b(_1); } + // CHECK-LABEL: @vext2xv_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> _112) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 vext2xv_hu_bu(v32i8 _1) { return __lasx_vext2xv_hu_bu(_1); } + // CHECK-LABEL: @vext2xv_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _112:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> _112) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_wu_hu(v16i16 _1) { return __lasx_vext2xv_wu_hu(_1); } + // CHECK-LABEL: @vext2xv_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _112:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> _112) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_wu(v8i32 _1) { return __lasx_vext2xv_du_wu(_1); } + // CHECK-LABEL: @vext2xv_wu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> _112) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_wu_bu(v32i8 _1) { return __lasx_vext2xv_wu_bu(_1); } + // CHECK-LABEL: @vext2xv_du_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _112:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> _112) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_hu(v16i16 _1) { return __lasx_vext2xv_du_hu(_1); } + // CHECK-LABEL: @vext2xv_du_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> _112) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_bu(v32i8 _1) { return __lasx_vext2xv_du_bu(_1); } + // CHECK-LABEL: @xvpermi_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __lasx_xvpermi_q(_1, _2, 1); } + // CHECK-LABEL: @xvpermi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvpermi_d(v4i64 _1) { return __lasx_xvpermi_d(_1, 1); } + // CHECK-LABEL: @xvperm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __lasx_xvperm_w(_1, _2); } + // CHECK-LABEL: @xvldrepl_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: store <32 x i8> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvldrepl_b(void * _1) { return __lasx_xvldrepl_b(_1, 1); } + // CHECK-LABEL: @xvldrepl_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr _1:%.*, i32 2) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: store <16 x i16> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvldrepl_h(void * _1) { return __lasx_xvldrepl_h(_1, 2); } + // CHECK-LABEL: @xvldrepl_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr _1:%.*, i32 4) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: store <8 x i32> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvldrepl_w(void * _1) { return __lasx_xvldrepl_w(_1, 4); } + // CHECK-LABEL: @xvldrepl_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr _1:%.*, i32 8) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: store <4 x i64> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvldrepl_d(void * _1) { return __lasx_xvldrepl_d(_1, 8); } + // CHECK-LABEL: @xvpickve2gr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xvpickve2gr_w(v8i32 _1) { return __lasx_xvpickve2gr_w(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: ret i32 TMP1 + // + unsigned int xvpickve2gr_wu(v8i32 _1) { return __lasx_xvpickve2gr_wu(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret i64 TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: ret i64 TMP1 + // + long xvpickve2gr_d(v4i64 _1) { return __lasx_xvpickve2gr_d(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret i64 TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> _1, i32 1) ++// CHECK-NEXT: ret i64 TMP1 + // + unsigned long int xvpickve2gr_du(v4i64 _1) { return __lasx_xvpickve2gr_du(_1, 1); } + // CHECK-LABEL: @xvaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwev_q_d(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwev_d_w(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwev_w_h(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwev_h_b(_1, _2); } + // CHECK-LABEL: @xvaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwev_q_du(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvsubwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwev_q_d(_1, _2); } + // CHECK-LABEL: @xvsubwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwev_d_w(_1, _2); } + // CHECK-LABEL: @xvsubwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwev_w_h(_1, _2); } + // CHECK-LABEL: @xvsubwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwev_h_b(_1, _2); } + // CHECK-LABEL: @xvsubwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwev_q_du(_1, _2); } + // CHECK-LABEL: @xvsubwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvsubwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvsubwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwev_q_d(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwev_d_w(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwev_w_h(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwev_h_b(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwev_q_du(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvaddwod_q_d(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvaddwod_d_w(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvaddwod_w_h(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvaddwod_h_b(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvaddwod_q_du(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvaddwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvaddwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvaddwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvsubwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvsubwod_q_d(_1, _2); } + // CHECK-LABEL: @xvsubwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvsubwod_d_w(_1, _2); } + // CHECK-LABEL: @xvsubwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvsubwod_w_h(_1, _2); } + // CHECK-LABEL: @xvsubwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvsubwod_h_b(_1, _2); } + // CHECK-LABEL: @xvsubwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvsubwod_q_du(_1, _2); } + // CHECK-LABEL: @xvsubwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvsubwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvsubwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvsubwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvsubwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvsubwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvmulwod_q_d(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __lasx_xvmulwod_d_w(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __lasx_xvmulwod_w_h(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __lasx_xvmulwod_h_b(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __lasx_xvmulwod_q_du(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __lasx_xvmulwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __lasx_xvmulwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __lasx_xvmulwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwev_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwev_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwev_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwev_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwev_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwev_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvaddwod_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvaddwod_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvaddwod_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __lasx_xvmulwod_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __lasx_xvmulwod_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __lasx_xvmulwod_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvhaddw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhaddw_q_d(_1, _2); } + // CHECK-LABEL: @xvhaddw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhaddw_qu_du(_1, _2); } + // CHECK-LABEL: @xvhsubw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __lasx_xvhsubw_q_d(_1, _2); } + // CHECK-LABEL: @xvhsubw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __lasx_xvhsubw_qu_du(_1, _2); } + // CHECK-LABEL: @xvmaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _346:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> _1, <8 x i32> _235, <8 x i32> _346) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _136:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> _136, <16 x i16> _247, <16 x i16> _358) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _136:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> _136, <32 x i8> _247, <32 x i8> _358) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwev_q_du(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _346:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> _1, <8 x i32> _235, <8 x i32> _346) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwev_d_wu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _136:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> _136, <16 x i16> _247, <16 x i16> _358) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwev_w_hu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _136:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> _136, <32 x i8> _247, <32 x i8> _358) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwev_h_bu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _346:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> _1, <8 x i32> _235, <8 x i32> _346) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _136:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> _136, <16 x i16> _247, <16 x i16> _358) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _136:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> _136, <32 x i8> _247, <32 x i8> _358) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __lasx_xvmaddwod_q_du(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _346:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> _1, <8 x i32> _235, <8 x i32> _346) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __lasx_xvmaddwod_d_wu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _136:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> _136, <16 x i16> _247, <16 x i16> _358) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __lasx_xvmaddwod_w_hu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _136:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> _136, <32 x i8> _247, <32 x i8> _358) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __lasx_xvmaddwod_h_bu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwev_q_du_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _346:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> _1, <8 x i32> _235, <8 x i32> _346) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwev_d_wu_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _136:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> _136, <16 x i16> _247, <16 x i16> _358) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwev_w_hu_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _136:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> _136, <32 x i8> _247, <32 x i8> _358) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwev_h_bu_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __lasx_xvmaddwod_q_du_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _346:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> _1, <8 x i32> _235, <8 x i32> _346) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __lasx_xvmaddwod_d_wu_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _136:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> _136, <16 x i16> _247, <16 x i16> _358) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __lasx_xvmaddwod_w_hu_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _136:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _247:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _358:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> _136, <32 x i8> _247, <32 x i8> _358) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __lasx_xvmaddwod_h_bu_b(_1, _2, _3); } + // CHECK-LABEL: @xvrotr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _124:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> _124, <32 x i8> _235) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __lasx_xvrotr_b(_1, _2); } + // CHECK-LABEL: @xvrotr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _124:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> _124, <16 x i16> _235) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __lasx_xvrotr_h(_1, _2); } + // CHECK-LABEL: @xvrotr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _124:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _235:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> _124, <8 x i32> _235) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __lasx_xvrotr_w(_1, _2); } + // CHECK-LABEL: @xvrotr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __lasx_xvrotr_d(_1, _2); } + // CHECK-LABEL: @xvadd_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __lasx_xvadd_q(_1, _2); } + // CHECK-LABEL: @xvsub_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __lasx_xvsub_q(_1, _2); } + // CHECK-LABEL: @xvaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwev_q_du_d(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvaddwod_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwev_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __lasx_xvmulwod_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmskgez_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> _112) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmskgez_b(v32i8 _1) { return __lasx_xvmskgez_b(_1); } + // CHECK-LABEL: @xvmsknz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> _112) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmsknz_b(v32i8 _1) { return __lasx_xvmsknz_b(_1); } + // CHECK-LABEL: @xvexth_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> _112) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvexth_h_b(v32i8 _1) { return __lasx_xvexth_h_b(_1); } + // CHECK-LABEL: @xvexth_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _112:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> _112) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvexth_w_h(v16i16 _1) { return __lasx_xvexth_w_h(_1); } + // CHECK-LABEL: @xvexth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _112:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> _112) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvexth_d_w(v8i32 _1) { return __lasx_xvexth_d_w(_1); } + // CHECK-LABEL: @xvexth_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvexth_q_d(v4i64 _1) { return __lasx_xvexth_q_d(_1); } + // CHECK-LABEL: @xvexth_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _112:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> _112) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvexth_hu_bu(v32u8 _1) { return __lasx_xvexth_hu_bu(_1); } + // CHECK-LABEL: @xvexth_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _112:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> _112) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvexth_wu_hu(v16u16 _1) { return __lasx_xvexth_wu_hu(_1); } + // CHECK-LABEL: @xvexth_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _112:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> _112) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvexth_du_wu(v8u32 _1) { return __lasx_xvexth_du_wu(_1); } + // CHECK-LABEL: @xvexth_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvexth_qu_du(v4u64 _1) { return __lasx_xvexth_qu_du(_1); } + // CHECK-LABEL: @xvrotri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvrotri_b(v32i8 _1) { return __lasx_xvrotri_b(_1, 1); } + // CHECK-LABEL: @xvrotri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvrotri_h(v16i16 _1) { return __lasx_xvrotri_h(_1, 1); } + // CHECK-LABEL: @xvrotri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvrotri_w(v8i32 _1) { return __lasx_xvrotri_w(_1, 1); } + // CHECK-LABEL: @xvrotri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvrotri_d(v4i64 _1) { return __lasx_xvrotri_d(_1, 1); } + // CHECK-LABEL: @xvextl_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvextl_q_d(v4i64 _1) { return __lasx_xvextl_q_d(_1); } + // CHECK-LABEL: @xvsrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrlrni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrlrni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrlrni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrlrni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvsrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvsrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvsrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvsrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrani_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrani_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrani_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrani_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __lasx_xvssrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __lasx_xvssrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __lasx_xvssrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __lasx_xvssrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __lasx_xvssrarni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __lasx_xvssrarni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __lasx_xvssrarni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __lasx_xvssrarni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xbnz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbnz_b(v32u8 _1) { return __lasx_xbnz_b(_1); } + // CHECK-LABEL: @xbnz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbnz_d(v4u64 _1) { return __lasx_xbnz_d(_1); } + // CHECK-LABEL: @xbnz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbnz_h(v16u16 _1) { return __lasx_xbnz_h(_1); } + // CHECK-LABEL: @xbnz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbnz_v(v32u8 _1) { return __lasx_xbnz_v(_1); } + // CHECK-LABEL: @xbnz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbnz_w(v8u32 _1) { return __lasx_xbnz_w(_1); } + // CHECK-LABEL: @xbz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbz_b(v32u8 _1) { return __lasx_xbz_b(_1); } + // CHECK-LABEL: @xbz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbz_d(v4u64 _1) { return __lasx_xbz_d(_1); } + // CHECK-LABEL: @xbz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbz_h(v16u16 _1) { return __lasx_xbz_h(_1); } + // CHECK-LABEL: @xbz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbz_v(v32u8 _1) { return __lasx_xbz_v(_1); } + // CHECK-LABEL: @xbz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbz_w(v8u32 _1) { return __lasx_xbz_w(_1); } + // CHECK-LABEL: @xvfcmp_caf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_caf_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_caf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_caf_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_ceq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_ceq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_ceq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_ceq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cle_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cle_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_clt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_clt_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_clt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_clt_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cne_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cne_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cor_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cor_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cueq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cueq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cule_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cule_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cult_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cult_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cun_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_cune_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cune_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_cun_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_saf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_saf_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_saf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_saf_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_seq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_seq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_seq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_seq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sle_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sle_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_slt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_slt_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_slt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_slt_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sne_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sne_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sor_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sor_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sueq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sueq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sule_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sule_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sult_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sult_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sun_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __lasx_xvfcmp_sune_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sune_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __lasx_xvfcmp_sun_s(_1, _2); } + // CHECK-LABEL: @xvpickve_d_f( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> _1, i32 1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvpickve_d_f(v4f64 _1) { return __lasx_xvpickve_d_f(_1, 1); } + // CHECK-LABEL: @xvpickve_w_f( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> _1, i32 1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvpickve_w_f(v8f32 _1) { return __lasx_xvpickve_w_f(_1, 1); } + // CHECK-LABEL: @xvrepli_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: store <32 x i8> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvrepli_b() { return __lasx_xvrepli_b(1); } + // CHECK-LABEL: @xvrepli_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: store <4 x i64> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvrepli_d() { return __lasx_xvrepli_d(1); } + // CHECK-LABEL: @xvrepli_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: store <16 x i16> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvrepli_h() { return __lasx_xvrepli_h(1); } + // CHECK-LABEL: @xvrepli_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: store <8 x i32> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvrepli_w() { return __lasx_xvrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lasx/builtin.c b/clang/test/CodeGen/LoongArch/lasx/builtin.c +index 0185f2004d52..f52a23a5faea 100644 +--- a/clang/test/CodeGen/LoongArch/lasx/builtin.c ++++ b/clang/test/CodeGen/LoongArch/lasx/builtin.c +@@ -27,4426 +27,6382 @@ typedef double v4f64_d __attribute__((vector_size(32), aligned(8))); + + // CHECK-LABEL: @xvsll_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2:!0-9+ ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsll.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsll_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsll_b(_1, _2); } + // CHECK-LABEL: @xvsll_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsll.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsll_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsll_h(_1, _2); } + // CHECK-LABEL: @xvsll_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsll.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsll_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsll_w(_1, _2); } + // CHECK-LABEL: @xvsll_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsll.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsll_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsll_d(_1, _2); } + // CHECK-LABEL: @xvslli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslli.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvslli_b(v32i8 _1) { return __builtin_lasx_xvslli_b(_1, 1); } + // CHECK-LABEL: @xvslli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslli.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvslli_h(v16i16 _1) { return __builtin_lasx_xvslli_h(_1, 1); } + // CHECK-LABEL: @xvslli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslli.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvslli_w(v8i32 _1) { return __builtin_lasx_xvslli_w(_1, 1); } + // CHECK-LABEL: @xvslli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslli.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvslli_d(v4i64 _1) { return __builtin_lasx_xvslli_d(_1, 1); } + // CHECK-LABEL: @xvsra_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsra.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsra_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsra_b(_1, _2); } + // CHECK-LABEL: @xvsra_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsra.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsra_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsra_h(_1, _2); } + // CHECK-LABEL: @xvsra_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsra.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsra_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsra_w(_1, _2); } + // CHECK-LABEL: @xvsra_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsra.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsra_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsra_d(_1, _2); } + // CHECK-LABEL: @xvsrai_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrai.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrai_b(v32i8 _1) { return __builtin_lasx_xvsrai_b(_1, 1); } + // CHECK-LABEL: @xvsrai_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrai.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrai_h(v16i16 _1) { return __builtin_lasx_xvsrai_h(_1, 1); } + // CHECK-LABEL: @xvsrai_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrai.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrai_w(v8i32 _1) { return __builtin_lasx_xvsrai_w(_1, 1); } + // CHECK-LABEL: @xvsrai_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrai.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrai_d(v4i64 _1) { return __builtin_lasx_xvsrai_d(_1, 1); } + // CHECK-LABEL: @xvsrar_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrar.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrar_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrar_b(_1, _2); } + // CHECK-LABEL: @xvsrar_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrar.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrar_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrar_h(_1, _2); } + // CHECK-LABEL: @xvsrar_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrar.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrar_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrar_w(_1, _2); } + // CHECK-LABEL: @xvsrar_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrar.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrar_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrar_d(_1, _2); } + // CHECK-LABEL: @xvsrari_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrari.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrari_b(v32i8 _1) { return __builtin_lasx_xvsrari_b(_1, 1); } + // CHECK-LABEL: @xvsrari_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrari.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrari_h(v16i16 _1) { return __builtin_lasx_xvsrari_h(_1, 1); } + // CHECK-LABEL: @xvsrari_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrari.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrari_w(v8i32 _1) { return __builtin_lasx_xvsrari_w(_1, 1); } + // CHECK-LABEL: @xvsrari_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrari.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrari_d(v4i64 _1) { return __builtin_lasx_xvsrari_d(_1, 1); } + // CHECK-LABEL: @xvsrl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrl.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrl_b(_1, _2); } + // CHECK-LABEL: @xvsrl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrl.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrl_h(_1, _2); } + // CHECK-LABEL: @xvsrl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrl.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrl_w(_1, _2); } + // CHECK-LABEL: @xvsrl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrl.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrl_d(_1, _2); } + // CHECK-LABEL: @xvsrli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrli.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrli_b(v32i8 _1) { return __builtin_lasx_xvsrli_b(_1, 1); } + // CHECK-LABEL: @xvsrli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrli.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrli_h(v16i16 _1) { return __builtin_lasx_xvsrli_h(_1, 1); } + // CHECK-LABEL: @xvsrli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrli.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrli_w(v8i32 _1) { return __builtin_lasx_xvsrli_w(_1, 1); } + // CHECK-LABEL: @xvsrli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrli.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrli_d(v4i64 _1) { return __builtin_lasx_xvsrli_d(_1, 1); } + // CHECK-LABEL: @xvsrlr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlr.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrlr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlr_b(_1, _2); } + // CHECK-LABEL: @xvsrlr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlr.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrlr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlr_h(_1, _2); } + // CHECK-LABEL: @xvsrlr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlr.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrlr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlr_w(_1, _2); } + // CHECK-LABEL: @xvsrlr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlr.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrlr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlr_d(_1, _2); } + // CHECK-LABEL: @xvsrlri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlri.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrlri_b(v32i8 _1) { return __builtin_lasx_xvsrlri_b(_1, 1); } + // CHECK-LABEL: @xvsrlri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlri.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrlri_h(v16i16 _1) { return __builtin_lasx_xvsrlri_h(_1, 1); } + // CHECK-LABEL: @xvsrlri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlri.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrlri_w(v8i32 _1) { return __builtin_lasx_xvsrlri_w(_1, 1); } + // CHECK-LABEL: @xvsrlri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlri.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrlri_d(v4i64 _1) { return __builtin_lasx_xvsrlri_d(_1, 1); } + // CHECK-LABEL: @xvbitclr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclr.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvbitclr_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitclr_b(_1, _2); } + // CHECK-LABEL: @xvbitclr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclr.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvbitclr_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitclr_h(_1, _2); } + // CHECK-LABEL: @xvbitclr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclr.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvbitclr_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitclr_w(_1, _2); } + // CHECK-LABEL: @xvbitclr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclr.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvbitclr_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitclr_d(_1, _2); } + // CHECK-LABEL: @xvbitclri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitclri.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvbitclri_b(v32u8 _1) { return __builtin_lasx_xvbitclri_b(_1, 1); } + // CHECK-LABEL: @xvbitclri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitclri.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvbitclri_h(v16u16 _1) { return __builtin_lasx_xvbitclri_h(_1, 1); } + // CHECK-LABEL: @xvbitclri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitclri.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvbitclri_w(v8u32 _1) { return __builtin_lasx_xvbitclri_w(_1, 1); } + // CHECK-LABEL: @xvbitclri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitclri.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvbitclri_d(v4u64 _1) { return __builtin_lasx_xvbitclri_d(_1, 1); } + // CHECK-LABEL: @xvbitset_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitset.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvbitset_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitset_b(_1, _2); } + // CHECK-LABEL: @xvbitset_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitset.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvbitset_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitset_h(_1, _2); } + // CHECK-LABEL: @xvbitset_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitset.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvbitset_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitset_w(_1, _2); } + // CHECK-LABEL: @xvbitset_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitset.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvbitset_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitset_d(_1, _2); } + // CHECK-LABEL: @xvbitseti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseti.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvbitseti_b(v32u8 _1) { return __builtin_lasx_xvbitseti_b(_1, 1); } + // CHECK-LABEL: @xvbitseti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitseti.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvbitseti_h(v16u16 _1) { return __builtin_lasx_xvbitseti_h(_1, 1); } + // CHECK-LABEL: @xvbitseti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitseti.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvbitseti_w(v8u32 _1) { return __builtin_lasx_xvbitseti_w(_1, 1); } + // CHECK-LABEL: @xvbitseti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitseti.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvbitseti_d(v4u64 _1) { return __builtin_lasx_xvbitseti_d(_1, 1); } + // CHECK-LABEL: @xvbitrev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrev.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvbitrev_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitrev_b(_1, _2); } + // CHECK-LABEL: @xvbitrev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrev.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvbitrev_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvbitrev_h(_1, _2); } + // CHECK-LABEL: @xvbitrev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrev.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvbitrev_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvbitrev_w(_1, _2); } + // CHECK-LABEL: @xvbitrev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrev.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvbitrev_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvbitrev_d(_1, _2); } + // CHECK-LABEL: @xvbitrevi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitrevi.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvbitrevi_b(v32u8 _1) { return __builtin_lasx_xvbitrevi_b(_1, 1); } + // CHECK-LABEL: @xvbitrevi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvbitrevi.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvbitrevi_h(v16u16 _1) { return __builtin_lasx_xvbitrevi_h(_1, 1); } + // CHECK-LABEL: @xvbitrevi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvbitrevi.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvbitrevi_w(v8u32 _1) { return __builtin_lasx_xvbitrevi_w(_1, 1); } + // CHECK-LABEL: @xvbitrevi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvbitrevi.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvbitrevi_d(v4u64 _1) { return __builtin_lasx_xvbitrevi_d(_1, 1); } + // CHECK-LABEL: @xvadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvadd.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadd_b(_1, _2); } + // CHECK-LABEL: @xvadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvadd.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadd_h(_1, _2); } + // CHECK-LABEL: @xvadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvadd.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadd_w(_1, _2); } + // CHECK-LABEL: @xvadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_d(_1, _2); } + // CHECK-LABEL: @xvaddi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvaddi.bu(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvaddi_bu(v32i8 _1) { return __builtin_lasx_xvaddi_bu(_1, 1); } + // CHECK-LABEL: @xvaddi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddi.hu(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvaddi_hu(v16i16 _1) { return __builtin_lasx_xvaddi_hu(_1, 1); } + // CHECK-LABEL: @xvaddi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddi.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvaddi_wu(v8i32 _1) { return __builtin_lasx_xvaddi_wu(_1, 1); } + // CHECK-LABEL: @xvaddi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddi.du(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddi_du(v4i64 _1) { return __builtin_lasx_xvaddi_du(_1, 1); } + // CHECK-LABEL: @xvsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsub.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsub_b(_1, _2); } + // CHECK-LABEL: @xvsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsub.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsub_h(_1, _2); } + // CHECK-LABEL: @xvsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsub.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsub_w(_1, _2); } + // CHECK-LABEL: @xvsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_d(_1, _2); } + // CHECK-LABEL: @xvsubi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsubi.bu(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsubi_bu(v32i8 _1) { return __builtin_lasx_xvsubi_bu(_1, 1); } + // CHECK-LABEL: @xvsubi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubi.hu(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsubi_hu(v16i16 _1) { return __builtin_lasx_xvsubi_hu(_1, 1); } + // CHECK-LABEL: @xvsubi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubi.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsubi_wu(v8i32 _1) { return __builtin_lasx_xvsubi_wu(_1, 1); } + // CHECK-LABEL: @xvsubi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubi.du(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubi_du(v4i64 _1) { return __builtin_lasx_xvsubi_du(_1, 1); } + // CHECK-LABEL: @xvmax_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmax_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmax_b(_1, _2); } + // CHECK-LABEL: @xvmax_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmax_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmax_h(_1, _2); } + // CHECK-LABEL: @xvmax_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmax_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmax_w(_1, _2); } + // CHECK-LABEL: @xvmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmax_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmax_d(_1, _2); } + // CHECK-LABEL: @xvmaxi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmaxi_b(v32i8 _1) { return __builtin_lasx_xvmaxi_b(_1, 1); } + // CHECK-LABEL: @xvmaxi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmaxi_h(v16i16 _1) { return __builtin_lasx_xvmaxi_h(_1, 1); } + // CHECK-LABEL: @xvmaxi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmaxi_w(v8i32 _1) { return __builtin_lasx_xvmaxi_w(_1, 1); } + // CHECK-LABEL: @xvmaxi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaxi_d(v4i64 _1) { return __builtin_lasx_xvmaxi_d(_1, 1); } + // CHECK-LABEL: @xvmax_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmax.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvmax_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmax_bu(_1, _2); } + // CHECK-LABEL: @xvmax_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmax.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvmax_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmax_hu(_1, _2); } + // CHECK-LABEL: @xvmax_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmax.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvmax_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmax_wu(_1, _2); } + // CHECK-LABEL: @xvmax_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmax.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmax_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmax_du(_1, _2); } + // CHECK-LABEL: @xvmaxi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmaxi.bu(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvmaxi_bu(v32u8 _1) { return __builtin_lasx_xvmaxi_bu(_1, 1); } + // CHECK-LABEL: @xvmaxi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaxi.hu(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvmaxi_hu(v16u16 _1) { return __builtin_lasx_xvmaxi_hu(_1, 1); } + // CHECK-LABEL: @xvmaxi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaxi.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvmaxi_wu(v8u32 _1) { return __builtin_lasx_xvmaxi_wu(_1, 1); } + // CHECK-LABEL: @xvmaxi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaxi.du(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmaxi_du(v4u64 _1) { return __builtin_lasx_xvmaxi_du(_1, 1); } + // CHECK-LABEL: @xvmin_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmin_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmin_b(_1, _2); } + // CHECK-LABEL: @xvmin_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmin_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmin_h(_1, _2); } + // CHECK-LABEL: @xvmin_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmin_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmin_w(_1, _2); } + // CHECK-LABEL: @xvmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmin_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmin_d(_1, _2); } + // CHECK-LABEL: @xvmini_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmini_b(v32i8 _1) { return __builtin_lasx_xvmini_b(_1, 1); } + // CHECK-LABEL: @xvmini_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmini_h(v16i16 _1) { return __builtin_lasx_xvmini_h(_1, 1); } + // CHECK-LABEL: @xvmini_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmini_w(v8i32 _1) { return __builtin_lasx_xvmini_w(_1, 1); } + // CHECK-LABEL: @xvmini_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmini_d(v4i64 _1) { return __builtin_lasx_xvmini_d(_1, 1); } + // CHECK-LABEL: @xvmin_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmin.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvmin_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmin_bu(_1, _2); } + // CHECK-LABEL: @xvmin_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmin.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvmin_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmin_hu(_1, _2); } + // CHECK-LABEL: @xvmin_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmin.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvmin_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmin_wu(_1, _2); } + // CHECK-LABEL: @xvmin_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmin.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmin_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmin_du(_1, _2); } + // CHECK-LABEL: @xvmini_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmini.bu(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvmini_bu(v32u8 _1) { return __builtin_lasx_xvmini_bu(_1, 1); } + // CHECK-LABEL: @xvmini_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmini.hu(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvmini_hu(v16u16 _1) { return __builtin_lasx_xvmini_hu(_1, 1); } + // CHECK-LABEL: @xvmini_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmini.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvmini_wu(v8u32 _1) { return __builtin_lasx_xvmini_wu(_1, 1); } + // CHECK-LABEL: @xvmini_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmini.du(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmini_du(v4u64 _1) { return __builtin_lasx_xvmini_du(_1, 1); } + // CHECK-LABEL: @xvseq_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvseq.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvseq_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvseq_b(_1, _2); } + // CHECK-LABEL: @xvseq_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvseq.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvseq_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvseq_h(_1, _2); } + // CHECK-LABEL: @xvseq_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvseq.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvseq_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvseq_w(_1, _2); } + // CHECK-LABEL: @xvseq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvseq.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvseq_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvseq_d(_1, _2); } + // CHECK-LABEL: @xvseqi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvseqi.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvseqi_b(v32i8 _1) { return __builtin_lasx_xvseqi_b(_1, 1); } + // CHECK-LABEL: @xvseqi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvseqi.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvseqi_h(v16i16 _1) { return __builtin_lasx_xvseqi_h(_1, 1); } + // CHECK-LABEL: @xvseqi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvseqi.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvseqi_w(v8i32 _1) { return __builtin_lasx_xvseqi_w(_1, 1); } + // CHECK-LABEL: @xvseqi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvseqi.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvseqi_d(v4i64 _1) { return __builtin_lasx_xvseqi_d(_1, 1); } + // CHECK-LABEL: @xvslt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvslt_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvslt_b(_1, _2); } + // CHECK-LABEL: @xvslt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvslt_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvslt_h(_1, _2); } + // CHECK-LABEL: @xvslt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvslt_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvslt_w(_1, _2); } + // CHECK-LABEL: @xvslt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvslt_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvslt_d(_1, _2); } + // CHECK-LABEL: @xvslti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvslti_b(v32i8 _1) { return __builtin_lasx_xvslti_b(_1, 1); } + // CHECK-LABEL: @xvslti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvslti_h(v16i16 _1) { return __builtin_lasx_xvslti_h(_1, 1); } + // CHECK-LABEL: @xvslti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvslti_w(v8i32 _1) { return __builtin_lasx_xvslti_w(_1, 1); } + // CHECK-LABEL: @xvslti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvslti_d(v4i64 _1) { return __builtin_lasx_xvslti_d(_1, 1); } + // CHECK-LABEL: @xvslt_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslt.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvslt_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvslt_bu(_1, _2); } + // CHECK-LABEL: @xvslt_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslt.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvslt_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvslt_hu(_1, _2); } + // CHECK-LABEL: @xvslt_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslt.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvslt_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvslt_wu(_1, _2); } + // CHECK-LABEL: @xvslt_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslt.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvslt_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvslt_du(_1, _2); } + // CHECK-LABEL: @xvslti_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslti.bu(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvslti_bu(v32u8 _1) { return __builtin_lasx_xvslti_bu(_1, 1); } + // CHECK-LABEL: @xvslti_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslti.hu(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvslti_hu(v16u16 _1) { return __builtin_lasx_xvslti_hu(_1, 1); } + // CHECK-LABEL: @xvslti_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslti.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvslti_wu(v8u32 _1) { return __builtin_lasx_xvslti_wu(_1, 1); } + // CHECK-LABEL: @xvslti_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslti.du(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvslti_du(v4u64 _1) { return __builtin_lasx_xvslti_du(_1, 1); } + // CHECK-LABEL: @xvsle_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsle_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsle_b(_1, _2); } + // CHECK-LABEL: @xvsle_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsle_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsle_h(_1, _2); } + // CHECK-LABEL: @xvsle_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsle_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsle_w(_1, _2); } + // CHECK-LABEL: @xvsle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsle_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsle_d(_1, _2); } + // CHECK-LABEL: @xvslei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvslei_b(v32i8 _1) { return __builtin_lasx_xvslei_b(_1, 1); } + // CHECK-LABEL: @xvslei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvslei_h(v16i16 _1) { return __builtin_lasx_xvslei_h(_1, 1); } + // CHECK-LABEL: @xvslei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvslei_w(v8i32 _1) { return __builtin_lasx_xvslei_w(_1, 1); } + // CHECK-LABEL: @xvslei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvslei_d(v4i64 _1) { return __builtin_lasx_xvslei_d(_1, 1); } + // CHECK-LABEL: @xvsle_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsle.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsle_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsle_bu(_1, _2); } + // CHECK-LABEL: @xvsle_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsle.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsle_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsle_hu(_1, _2); } + // CHECK-LABEL: @xvsle_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsle.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsle_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsle_wu(_1, _2); } + // CHECK-LABEL: @xvsle_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsle.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsle_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsle_du(_1, _2); } + // CHECK-LABEL: @xvslei_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvslei.bu(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvslei_bu(v32u8 _1) { return __builtin_lasx_xvslei_bu(_1, 1); } + // CHECK-LABEL: @xvslei_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvslei.hu(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvslei_hu(v16u16 _1) { return __builtin_lasx_xvslei_hu(_1, 1); } + // CHECK-LABEL: @xvslei_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvslei.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvslei_wu(v8u32 _1) { return __builtin_lasx_xvslei_wu(_1, 1); } + // CHECK-LABEL: @xvslei_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvslei.du(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvslei_du(v4u64 _1) { return __builtin_lasx_xvslei_du(_1, 1); } + // CHECK-LABEL: @xvsat_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsat_b(v32i8 _1) { return __builtin_lasx_xvsat_b(_1, 1); } + // CHECK-LABEL: @xvsat_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsat_h(v16i16 _1) { return __builtin_lasx_xvsat_h(_1, 1); } + // CHECK-LABEL: @xvsat_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsat_w(v8i32 _1) { return __builtin_lasx_xvsat_w(_1, 1); } + // CHECK-LABEL: @xvsat_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsat_d(v4i64 _1) { return __builtin_lasx_xvsat_d(_1, 1); } + // CHECK-LABEL: @xvsat_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsat.bu(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvsat_bu(v32u8 _1) { return __builtin_lasx_xvsat_bu(_1, 1); } + // CHECK-LABEL: @xvsat_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsat.hu(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvsat_hu(v16u16 _1) { return __builtin_lasx_xvsat_hu(_1, 1); } + // CHECK-LABEL: @xvsat_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsat.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvsat_wu(v8u32 _1) { return __builtin_lasx_xvsat_wu(_1, 1); } + // CHECK-LABEL: @xvsat_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsat.du(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvsat_du(v4u64 _1) { return __builtin_lasx_xvsat_du(_1, 1); } + // CHECK-LABEL: @xvadda_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvadda.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvadda_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvadda_b(_1, _2); } + // CHECK-LABEL: @xvadda_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvadda.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvadda_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvadda_h(_1, _2); } + // CHECK-LABEL: @xvadda_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvadda.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvadda_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvadda_w(_1, _2); } + // CHECK-LABEL: @xvadda_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadda.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvadda_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadda_d(_1, _2); } + // CHECK-LABEL: @xvsadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsadd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsadd_b(_1, _2); } + // CHECK-LABEL: @xvsadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsadd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsadd_h(_1, _2); } + // CHECK-LABEL: @xvsadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsadd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsadd_w(_1, _2); } + // CHECK-LABEL: @xvsadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsadd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsadd_d(_1, _2); } + // CHECK-LABEL: @xvsadd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsadd.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvsadd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsadd_bu(_1, _2); } + // CHECK-LABEL: @xvsadd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsadd.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvsadd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsadd_hu(_1, _2); } + // CHECK-LABEL: @xvsadd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsadd.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvsadd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsadd_wu(_1, _2); } + // CHECK-LABEL: @xvsadd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsadd.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvsadd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsadd_du(_1, _2); } + // CHECK-LABEL: @xvavg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvavg_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavg_b(_1, _2); } + // CHECK-LABEL: @xvavg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvavg_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavg_h(_1, _2); } + // CHECK-LABEL: @xvavg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvavg_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavg_w(_1, _2); } + // CHECK-LABEL: @xvavg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvavg_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavg_d(_1, _2); } + // CHECK-LABEL: @xvavg_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavg.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvavg_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavg_bu(_1, _2); } + // CHECK-LABEL: @xvavg_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavg.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvavg_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavg_hu(_1, _2); } + // CHECK-LABEL: @xvavg_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavg.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvavg_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavg_wu(_1, _2); } + // CHECK-LABEL: @xvavg_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavg.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvavg_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavg_du(_1, _2); } + // CHECK-LABEL: @xvavgr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvavgr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvavgr_b(_1, _2); } + // CHECK-LABEL: @xvavgr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvavgr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvavgr_h(_1, _2); } + // CHECK-LABEL: @xvavgr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvavgr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvavgr_w(_1, _2); } + // CHECK-LABEL: @xvavgr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvavgr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvavgr_d(_1, _2); } + // CHECK-LABEL: @xvavgr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvavgr.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvavgr_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvavgr_bu(_1, _2); } + // CHECK-LABEL: @xvavgr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvavgr.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvavgr_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvavgr_hu(_1, _2); } + // CHECK-LABEL: @xvavgr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvavgr.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvavgr_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvavgr_wu(_1, _2); } + // CHECK-LABEL: @xvavgr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvavgr.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvavgr_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvavgr_du(_1, _2); } + // CHECK-LABEL: @xvssub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssub_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssub_b(_1, _2); } + // CHECK-LABEL: @xvssub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssub_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssub_h(_1, _2); } + // CHECK-LABEL: @xvssub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssub_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssub_w(_1, _2); } + // CHECK-LABEL: @xvssub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvssub_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssub_d(_1, _2); } + // CHECK-LABEL: @xvssub_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssub.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssub_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvssub_bu(_1, _2); } + // CHECK-LABEL: @xvssub_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssub.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssub_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssub_hu(_1, _2); } + // CHECK-LABEL: @xvssub_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssub.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssub_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssub_wu(_1, _2); } + // CHECK-LABEL: @xvssub_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssub.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvssub_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssub_du(_1, _2); } + // CHECK-LABEL: @xvabsd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvabsd_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvabsd_b(_1, _2); } + // CHECK-LABEL: @xvabsd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvabsd_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvabsd_h(_1, _2); } + // CHECK-LABEL: @xvabsd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvabsd_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvabsd_w(_1, _2); } + // CHECK-LABEL: @xvabsd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvabsd_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvabsd_d(_1, _2); } + // CHECK-LABEL: @xvabsd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvabsd.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvabsd_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvabsd_bu(_1, _2); } + // CHECK-LABEL: @xvabsd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvabsd.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvabsd_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvabsd_hu(_1, _2); } + // CHECK-LABEL: @xvabsd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvabsd.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvabsd_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvabsd_wu(_1, _2); } + // CHECK-LABEL: @xvabsd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvabsd.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvabsd_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvabsd_du(_1, _2); } + // CHECK-LABEL: @xvmul_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmul.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmul_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmul_b(_1, _2); } + // CHECK-LABEL: @xvmul_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmul.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmul_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmul_h(_1, _2); } + // CHECK-LABEL: @xvmul_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmul.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmul_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmul_w(_1, _2); } + // CHECK-LABEL: @xvmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmul.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmul_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmul_d(_1, _2); } + // CHECK-LABEL: @xvmadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmadd.b(<32 x i8> _1, <32 x i8> _2, <32 x i8> _3) ++// CHECK-NEXT: store <32 x i8> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmadd_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmadd_b(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmadd.h(<16 x i16> _1, <16 x i16> _2, <16 x i16> _3) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmadd_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmadd_h(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmadd.w(<8 x i32> _1, <8 x i32> _2, <8 x i32> _3) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmadd_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmadd_w(_1, _2, _3); } + // CHECK-LABEL: @xvmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmadd.d(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmadd_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmsub.b(<32 x i8> _1, <32 x i8> _2, <32 x i8> _3) ++// CHECK-NEXT: store <32 x i8> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmsub_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmsub_b(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmsub.h(<16 x i16> _1, <16 x i16> _2, <16 x i16> _3) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmsub_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmsub_h(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmsub.w(<8 x i32> _1, <8 x i32> _2, <8 x i32> _3) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmsub_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmsub_w(_1, _2, _3); } + // CHECK-LABEL: @xvmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmsub.d(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmsub_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvdiv_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvdiv_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvdiv_b(_1, _2); } + // CHECK-LABEL: @xvdiv_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvdiv_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvdiv_h(_1, _2); } + // CHECK-LABEL: @xvdiv_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvdiv_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvdiv_w(_1, _2); } + // CHECK-LABEL: @xvdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvdiv_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvdiv_d(_1, _2); } + // CHECK-LABEL: @xvdiv_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvdiv.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvdiv_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvdiv_bu(_1, _2); } + // CHECK-LABEL: @xvdiv_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvdiv.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvdiv_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvdiv_hu(_1, _2); } + // CHECK-LABEL: @xvdiv_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvdiv.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvdiv_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvdiv_wu(_1, _2); } + // CHECK-LABEL: @xvdiv_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvdiv.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvdiv_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvdiv_du(_1, _2); } + // CHECK-LABEL: @xvhaddw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.h.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvhaddw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhaddw_h_b(_1, _2); } + // CHECK-LABEL: @xvhaddw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.w.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvhaddw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhaddw_w_h(_1, _2); } + // CHECK-LABEL: @xvhaddw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.d.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvhaddw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhaddw_d_w(_1, _2); } + // CHECK-LABEL: @xvhaddw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhaddw.hu.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvhaddw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhaddw_hu_bu(_1, _2); } + // CHECK-LABEL: @xvhaddw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhaddw.wu.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvhaddw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhaddw_wu_hu(_1, _2); } + // CHECK-LABEL: @xvhaddw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.du.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvhaddw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhaddw_du_wu(_1, _2); } + // CHECK-LABEL: @xvhsubw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.h.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvhsubw_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvhsubw_h_b(_1, _2); } + // CHECK-LABEL: @xvhsubw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.w.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvhsubw_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvhsubw_w_h(_1, _2); } + // CHECK-LABEL: @xvhsubw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.d.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvhsubw_d_w(_1, _2); } + // CHECK-LABEL: @xvhsubw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvhsubw.hu.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvhsubw_hu_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvhsubw_hu_bu(_1, _2); } + // CHECK-LABEL: @xvhsubw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvhsubw.wu.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvhsubw_wu_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvhsubw_wu_hu(_1, _2); } + // CHECK-LABEL: @xvhsubw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.du.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_du_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvhsubw_du_wu(_1, _2); } + // CHECK-LABEL: @xvmod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmod_b(_1, _2); } + // CHECK-LABEL: @xvmod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmod_h(_1, _2); } + // CHECK-LABEL: @xvmod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmod_w(_1, _2); } + // CHECK-LABEL: @xvmod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmod_d(_1, _2); } + // CHECK-LABEL: @xvmod_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmod.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvmod_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmod_bu(_1, _2); } + // CHECK-LABEL: @xvmod_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmod.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvmod_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmod_hu(_1, _2); } + // CHECK-LABEL: @xvmod_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmod.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvmod_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmod_wu(_1, _2); } + // CHECK-LABEL: @xvmod_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmod.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmod_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmod_du(_1, _2); } + // CHECK-LABEL: @xvrepl128vei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrepl128vei.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvrepl128vei_b(v32i8 _1) { return __builtin_lasx_xvrepl128vei_b(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrepl128vei.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvrepl128vei_h(v16i16 _1) { return __builtin_lasx_xvrepl128vei_h(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrepl128vei.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvrepl128vei_w(v8i32 _1) { return __builtin_lasx_xvrepl128vei_w(_1, 1); } + // CHECK-LABEL: @xvrepl128vei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrepl128vei.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvrepl128vei_d(v4i64 _1) { return __builtin_lasx_xvrepl128vei_d(_1, 1); } + // CHECK-LABEL: @xvpickev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpickev.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvpickev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickev_b(_1, _2); } + // CHECK-LABEL: @xvpickev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpickev.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvpickev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickev_h(_1, _2); } + // CHECK-LABEL: @xvpickev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickev.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvpickev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickev_w(_1, _2); } + // CHECK-LABEL: @xvpickev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickev.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvpickev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickev_d(_1, _2); } + // CHECK-LABEL: @xvpickod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpickod.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvpickod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpickod_b(_1, _2); } + // CHECK-LABEL: @xvpickod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpickod.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvpickod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpickod_h(_1, _2); } + // CHECK-LABEL: @xvpickod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickod.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvpickod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpickod_w(_1, _2); } + // CHECK-LABEL: @xvpickod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickod.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvpickod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpickod_d(_1, _2); } + // CHECK-LABEL: @xvilvh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvilvh.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvilvh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvh_b(_1, _2); } + // CHECK-LABEL: @xvilvh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvilvh.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvilvh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvh_h(_1, _2); } + // CHECK-LABEL: @xvilvh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvilvh.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvilvh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvh_w(_1, _2); } + // CHECK-LABEL: @xvilvh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvilvh.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvilvh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvh_d(_1, _2); } + // CHECK-LABEL: @xvilvl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvilvl.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvilvl_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvilvl_b(_1, _2); } + // CHECK-LABEL: @xvilvl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvilvl.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvilvl_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvilvl_h(_1, _2); } + // CHECK-LABEL: @xvilvl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvilvl.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvilvl_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvilvl_w(_1, _2); } + // CHECK-LABEL: @xvilvl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvilvl.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvilvl_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvilvl_d(_1, _2); } + // CHECK-LABEL: @xvpackev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpackev.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvpackev_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackev_b(_1, _2); } + // CHECK-LABEL: @xvpackev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpackev.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvpackev_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackev_h(_1, _2); } + // CHECK-LABEL: @xvpackev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpackev.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvpackev_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackev_w(_1, _2); } + // CHECK-LABEL: @xvpackev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpackev.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvpackev_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackev_d(_1, _2); } + // CHECK-LABEL: @xvpackod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpackod.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvpackod_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpackod_b(_1, _2); } + // CHECK-LABEL: @xvpackod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpackod.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvpackod_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvpackod_h(_1, _2); } + // CHECK-LABEL: @xvpackod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpackod.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvpackod_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpackod_w(_1, _2); } + // CHECK-LABEL: @xvpackod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpackod.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvpackod_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvpackod_d(_1, _2); } + // CHECK-LABEL: @xvshuf_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf.b(<32 x i8> _1, <32 x i8> _2, <32 x i8> _3) ++// CHECK-NEXT: store <32 x i8> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvshuf_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvshuf_b(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf.h(<16 x i16> _1, <16 x i16> _2, <16 x i16> _3) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvshuf_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvshuf_h(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf.w(<8 x i32> _1, <8 x i32> _2, <8 x i32> _3) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvshuf_w(v8i32 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvshuf_w(_1, _2, _3); } + // CHECK-LABEL: @xvshuf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf.d(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvshuf_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvshuf_d(_1, _2, _3); } + // CHECK-LABEL: @xvand_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvand.v(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvand_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvand_v(_1, _2); } + // CHECK-LABEL: @xvandi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvandi.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvandi_b(v32u8 _1) { return __builtin_lasx_xvandi_b(_1, 1); } + // CHECK-LABEL: @xvor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvor.v(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvor_v(_1, _2); } + // CHECK-LABEL: @xvori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvori.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvori_b(v32u8 _1) { return __builtin_lasx_xvori_b(_1, 1); } + // CHECK-LABEL: @xvnor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvnor.v(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvnor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvnor_v(_1, _2); } + // CHECK-LABEL: @xvnori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvnori.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvnori_b(v32u8 _1) { return __builtin_lasx_xvnori_b(_1, 1); } + // CHECK-LABEL: @xvxor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvxor.v(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvxor_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvxor_v(_1, _2); } + // CHECK-LABEL: @xvxori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvxori.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvxori_b(v32u8 _1) { return __builtin_lasx_xvxori_b(_1, 1); } + // CHECK-LABEL: @xvbitsel_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitsel.v(<32 x i8> _1, <32 x i8> _2, <32 x i8> _3) ++// CHECK-NEXT: store <32 x i8> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvbitsel_v(v32u8 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvbitsel_v(_1, _2, _3); } + // CHECK-LABEL: @xvbitseli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbitseli.b(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvbitseli_b(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvbitseli_b(_1, _2, 1); } + // CHECK-LABEL: @xvshuf4i_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvshuf4i.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvshuf4i_b(v32i8 _1) { return __builtin_lasx_xvshuf4i_b(_1, 1); } + // CHECK-LABEL: @xvshuf4i_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvshuf4i.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvshuf4i_h(v16i16 _1) { return __builtin_lasx_xvshuf4i_h(_1, 1); } + // CHECK-LABEL: @xvshuf4i_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvshuf4i.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvshuf4i_w(v8i32 _1) { return __builtin_lasx_xvshuf4i_w(_1, 1); } + // CHECK-LABEL: @xvreplgr2vr_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplgr2vr.b(i32 _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: store <32 x i8> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvreplgr2vr_b(int _1) { return __builtin_lasx_xvreplgr2vr_b(_1); } + // CHECK-LABEL: @xvreplgr2vr_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvreplgr2vr.h(i32 _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: store <16 x i16> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvreplgr2vr_h(int _1) { return __builtin_lasx_xvreplgr2vr_h(_1); } + // CHECK-LABEL: @xvreplgr2vr_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvreplgr2vr.w(i32 _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: store <8 x i32> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvreplgr2vr_w(int _1) { return __builtin_lasx_xvreplgr2vr_w(_1); } + // CHECK-LABEL: @xvreplgr2vr_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: CONV:%.* = sext i32 _1:%.* to i64 + // CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvreplgr2vr.d(i64 CONV) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: store <4 x i64> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvreplgr2vr_d(int _1) { return __builtin_lasx_xvreplgr2vr_d(_1); } + // CHECK-LABEL: @xvpcnt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpcnt.b(<32 x i8> _1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvpcnt_b(v32i8 _1) { return __builtin_lasx_xvpcnt_b(_1); } + // CHECK-LABEL: @xvpcnt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvpcnt.h(<16 x i16> _1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvpcnt_h(v16i16 _1) { return __builtin_lasx_xvpcnt_h(_1); } + // CHECK-LABEL: @xvpcnt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpcnt.w(<8 x i32> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvpcnt_w(v8i32 _1) { return __builtin_lasx_xvpcnt_w(_1); } + // CHECK-LABEL: @xvpcnt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpcnt.d(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvpcnt_d(v4i64 _1) { return __builtin_lasx_xvpcnt_d(_1); } + // CHECK-LABEL: @xvclo_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvclo.b(<32 x i8> _1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvclo_b(v32i8 _1) { return __builtin_lasx_xvclo_b(_1); } + // CHECK-LABEL: @xvclo_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvclo.h(<16 x i16> _1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvclo_h(v16i16 _1) { return __builtin_lasx_xvclo_h(_1); } + // CHECK-LABEL: @xvclo_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvclo.w(<8 x i32> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvclo_w(v8i32 _1) { return __builtin_lasx_xvclo_w(_1); } + // CHECK-LABEL: @xvclo_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvclo.d(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvclo_d(v4i64 _1) { return __builtin_lasx_xvclo_d(_1); } + // CHECK-LABEL: @xvclz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvclz.b(<32 x i8> _1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvclz_b(v32i8 _1) { return __builtin_lasx_xvclz_b(_1); } + // CHECK-LABEL: @xvclz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvclz.h(<16 x i16> _1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvclz_h(v16i16 _1) { return __builtin_lasx_xvclz_h(_1); } + // CHECK-LABEL: @xvclz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvclz.w(<8 x i32> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvclz_w(v8i32 _1) { return __builtin_lasx_xvclz_w(_1); } + // CHECK-LABEL: @xvclz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvclz.d(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvclz_d(v4i64 _1) { return __builtin_lasx_xvclz_d(_1); } + // CHECK-LABEL: @xvfadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfadd.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfadd_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfadd_s(_1, _2); } + // CHECK-LABEL: @xvfadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfadd.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x double> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfadd_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfadd_d(_1, _2); } + // CHECK-LABEL: @xvfsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfsub.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfsub_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfsub_s(_1, _2); } + // CHECK-LABEL: @xvfsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfsub.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x double> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfsub_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfsub_d(_1, _2); } + // CHECK-LABEL: @xvfmul_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmul.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfmul_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmul_s(_1, _2); } + // CHECK-LABEL: @xvfmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmul.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x double> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfmul_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmul_d(_1, _2); } + // CHECK-LABEL: @xvfdiv_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfdiv.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfdiv_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfdiv_s(_1, _2); } + // CHECK-LABEL: @xvfdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfdiv.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x double> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfdiv_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfdiv_d(_1, _2); } + // CHECK-LABEL: @xvfcvt_h_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfcvt.h.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvfcvt_h_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcvt_h_s(_1, _2); } + // CHECK-LABEL: @xvfcvt_s_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvt.s.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfcvt_s_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcvt_s_d(_1, _2); } + // CHECK-LABEL: @xvfmin_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmin.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfmin_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmin_s(_1, _2); } + // CHECK-LABEL: @xvfmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmin.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x double> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfmin_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmin_d(_1, _2); } + // CHECK-LABEL: @xvfmina_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmina.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfmina_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmina_s(_1, _2); } + // CHECK-LABEL: @xvfmina_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmina.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x double> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfmina_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmina_d(_1, _2); } + // CHECK-LABEL: @xvfmax_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmax.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfmax_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmax_s(_1, _2); } + // CHECK-LABEL: @xvfmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmax.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x double> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfmax_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmax_d(_1, _2); } + // CHECK-LABEL: @xvfmaxa_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmaxa.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfmaxa_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfmaxa_s(_1, _2); } + // CHECK-LABEL: @xvfmaxa_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmaxa.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x double> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfmaxa_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfmaxa_d(_1, _2); } + // CHECK-LABEL: @xvfclass_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfclass.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfclass_s(v8f32 _1) { return __builtin_lasx_xvfclass_s(_1); } + // CHECK-LABEL: @xvfclass_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfclass.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfclass_d(v4f64 _1) { return __builtin_lasx_xvfclass_d(_1); } + // CHECK-LABEL: @xvfsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfsqrt.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfsqrt_s(v8f32 _1) { return __builtin_lasx_xvfsqrt_s(_1); } + // CHECK-LABEL: @xvfsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfsqrt.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfsqrt_d(v4f64 _1) { return __builtin_lasx_xvfsqrt_d(_1); } + // CHECK-LABEL: @xvfrecip_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrecip.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfrecip_s(v8f32 _1) { return __builtin_lasx_xvfrecip_s(_1); } + // CHECK-LABEL: @xvfrecip_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrecip.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfrecip_d(v4f64 _1) { return __builtin_lasx_xvfrecip_d(_1); } + // CHECK-LABEL: @xvfrint_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrint.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfrint_s(v8f32 _1) { return __builtin_lasx_xvfrint_s(_1); } + // CHECK-LABEL: @xvfrint_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrint.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfrint_d(v4f64 _1) { return __builtin_lasx_xvfrint_d(_1); } + // CHECK-LABEL: @xvfrsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrsqrt.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfrsqrt_s(v8f32 _1) { return __builtin_lasx_xvfrsqrt_s(_1); } + // CHECK-LABEL: @xvfrsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrsqrt.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfrsqrt_d(v4f64 _1) { return __builtin_lasx_xvfrsqrt_d(_1); } + // CHECK-LABEL: @xvflogb_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvflogb.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvflogb_s(v8f32 _1) { return __builtin_lasx_xvflogb_s(_1); } + // CHECK-LABEL: @xvflogb_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvflogb.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvflogb_d(v4f64 _1) { return __builtin_lasx_xvflogb_d(_1); } + // CHECK-LABEL: @xvfcvth_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvth.s.h(<16 x i16> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfcvth_s_h(v16i16 _1) { return __builtin_lasx_xvfcvth_s_h(_1); } + // CHECK-LABEL: @xvfcvth_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfcvth.d.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfcvth_d_s(v8f32 _1) { return __builtin_lasx_xvfcvth_d_s(_1); } + // CHECK-LABEL: @xvfcvtl_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfcvtl.s.h(<16 x i16> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfcvtl_s_h(v16i16 _1) { return __builtin_lasx_xvfcvtl_s_h(_1); } + // CHECK-LABEL: @xvfcvtl_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfcvtl.d.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfcvtl_d_s(v8f32 _1) { return __builtin_lasx_xvfcvtl_d_s(_1); } + // CHECK-LABEL: @xvftint_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftint_w_s(v8f32 _1) { return __builtin_lasx_xvftint_w_s(_1); } + // CHECK-LABEL: @xvftint_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.l.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftint_l_d(v4f64 _1) { return __builtin_lasx_xvftint_l_d(_1); } + // CHECK-LABEL: @xvftint_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.wu.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvftint_wu_s(v8f32 _1) { return __builtin_lasx_xvftint_wu_s(_1); } + // CHECK-LABEL: @xvftint_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftint.lu.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvftint_lu_d(v4f64 _1) { return __builtin_lasx_xvftint_lu_d(_1); } + // CHECK-LABEL: @xvftintrz_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftintrz_w_s(v8f32 _1) { return __builtin_lasx_xvftintrz_w_s(_1); } + // CHECK-LABEL: @xvftintrz_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.l.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrz_l_d(v4f64 _1) { return __builtin_lasx_xvftintrz_l_d(_1); } + // CHECK-LABEL: @xvftintrz_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.wu.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvftintrz_wu_s(v8f32 _1) { return __builtin_lasx_xvftintrz_wu_s(_1); } + // CHECK-LABEL: @xvftintrz_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrz.lu.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvftintrz_lu_d(v4f64 _1) { return __builtin_lasx_xvftintrz_lu_d(_1); } + // CHECK-LABEL: @xvffint_s_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.w(<8 x i32> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_w(v8i32 _1) { return __builtin_lasx_xvffint_s_w(_1); } + // CHECK-LABEL: @xvffint_d_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.l(<4 x i64> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvffint_d_l(v4i64 _1) { return __builtin_lasx_xvffint_d_l(_1); } + // CHECK-LABEL: @xvffint_s_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.wu(<8 x i32> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_wu(v8u32 _1) { return __builtin_lasx_xvffint_s_wu(_1); } + // CHECK-LABEL: @xvffint_d_lu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffint.d.lu(<4 x i64> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvffint_d_lu(v4u64 _1) { return __builtin_lasx_xvffint_d_lu(_1); } + // CHECK-LABEL: @xvreplve_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> _1:%.*, i32 _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve.b(<32 x i8> _1, i32 _2:%.*) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvreplve_b(v32i8 _1, int _2) { return __builtin_lasx_xvreplve_b(_1, _2); } + // CHECK-LABEL: @xvreplve_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> _1:%.*, i32 _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve.h(<16 x i16> _1, i32 _2:%.*) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvreplve_h(v16i16 _1, int _2) { return __builtin_lasx_xvreplve_h(_1, _2); } + // CHECK-LABEL: @xvreplve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> _1:%.*, i32 _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve.w(<8 x i32> _1, i32 _2:%.*) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvreplve_w(v8i32 _1, int _2) { return __builtin_lasx_xvreplve_w(_1, _2); } + // CHECK-LABEL: @xvreplve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> _1:%.*, i32 _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve.d(<4 x i64> _1, i32 _2:%.*) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvreplve_d(v4i64 _1, int _2) { return __builtin_lasx_xvreplve_d(_1, _2); } + // CHECK-LABEL: @xvpermi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpermi.w(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvpermi_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvpermi_w(_1, _2, 1); } + // CHECK-LABEL: @xvandn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvandn.v(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvandn_v(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvandn_v(_1, _2); } + // CHECK-LABEL: @xvneg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvneg.b(<32 x i8> _1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvneg_b(v32i8 _1) { return __builtin_lasx_xvneg_b(_1); } + // CHECK-LABEL: @xvneg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvneg.h(<16 x i16> _1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvneg_h(v16i16 _1) { return __builtin_lasx_xvneg_h(_1); } + // CHECK-LABEL: @xvneg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvneg.w(<8 x i32> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvneg_w(v8i32 _1) { return __builtin_lasx_xvneg_w(_1); } + // CHECK-LABEL: @xvneg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvneg.d(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvneg_d(v4i64 _1) { return __builtin_lasx_xvneg_d(_1); } + // CHECK-LABEL: @xvmuh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmuh_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmuh_b(_1, _2); } + // CHECK-LABEL: @xvmuh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmuh_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmuh_h(_1, _2); } + // CHECK-LABEL: @xvmuh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmuh_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmuh_w(_1, _2); } + // CHECK-LABEL: @xvmuh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmuh_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmuh_d(_1, _2); } + // CHECK-LABEL: @xvmuh_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmuh.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvmuh_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmuh_bu(_1, _2); } + // CHECK-LABEL: @xvmuh_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmuh.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvmuh_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmuh_hu(_1, _2); } + // CHECK-LABEL: @xvmuh_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmuh.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvmuh_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmuh_wu(_1, _2); } + // CHECK-LABEL: @xvmuh_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmuh.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmuh_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmuh_du(_1, _2); } + // CHECK-LABEL: @xvsllwil_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.h.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsllwil_h_b(v32i8 _1) { return __builtin_lasx_xvsllwil_h_b(_1, 1); } + // CHECK-LABEL: @xvsllwil_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.w.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsllwil_w_h(v16i16 _1) { return __builtin_lasx_xvsllwil_w_h(_1, 1); } + // CHECK-LABEL: @xvsllwil_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.d.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsllwil_d_w(v8i32 _1) { return __builtin_lasx_xvsllwil_d_w(_1, 1); } + // CHECK-LABEL: @xvsllwil_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsllwil.hu.bu(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvsllwil_hu_bu(v32u8 _1) { return __builtin_lasx_xvsllwil_hu_bu(_1, 1); } + // CHECK-LABEL: @xvsllwil_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsllwil.wu.hu(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvsllwil_wu_hu(v16u16 _1) { return __builtin_lasx_xvsllwil_wu_hu(_1, 1); } + // CHECK-LABEL: @xvsllwil_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsllwil.du.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvsllwil_du_wu(v8u32 _1) { return __builtin_lasx_xvsllwil_du_wu(_1, 1); } + // CHECK-LABEL: @xvsran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsran.b.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsran_b_h(_1, _2); } + // CHECK-LABEL: @xvsran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsran.h.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsran_h_w(_1, _2); } + // CHECK-LABEL: @xvsran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsran.w.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsran_w_d(_1, _2); } + // CHECK-LABEL: @xvssran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.b.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssran_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssran_b_h(_1, _2); } + // CHECK-LABEL: @xvssran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.h.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssran_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssran_h_w(_1, _2); } + // CHECK-LABEL: @xvssran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.w.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssran_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssran_w_d(_1, _2); } + // CHECK-LABEL: @xvssran_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssran.bu.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssran_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssran_bu_h(_1, _2); } + // CHECK-LABEL: @xvssran_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssran.hu.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssran_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssran_hu_w(_1, _2); } + // CHECK-LABEL: @xvssran_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssran.wu.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssran_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssran_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarn.b.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarn_b_h(_1, _2); } + // CHECK-LABEL: @xvsrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarn.h.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarn_h_w(_1, _2); } + // CHECK-LABEL: @xvsrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarn.w.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.b.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssrarn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarn_b_h(_1, _2); } + // CHECK-LABEL: @xvssrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.h.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssrarn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarn_h_w(_1, _2); } + // CHECK-LABEL: @xvssrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.w.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssrarn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrarn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarn.bu.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssrarn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrarn_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrarn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarn.hu.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssrarn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrarn_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrarn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarn.wu.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssrarn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrarn_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrln.b.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrln_b_h(_1, _2); } + // CHECK-LABEL: @xvsrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrln.h.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrln_h_w(_1, _2); } + // CHECK-LABEL: @xvsrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrln.w.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrln_w_d(_1, _2); } + // CHECK-LABEL: @xvssrln_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.bu.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssrln_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrln_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrln_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.hu.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssrln_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrln_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrln_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.wu.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssrln_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrln_wu_d(_1, _2); } + // CHECK-LABEL: @xvsrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrn.b.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrn_b_h(_1, _2); } + // CHECK-LABEL: @xvsrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrn.h.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrn_h_w(_1, _2); } + // CHECK-LABEL: @xvsrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrn.w.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrlrn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.bu.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssrlrn_bu_h(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvssrlrn_bu_h(_1, _2); } + // CHECK-LABEL: @xvssrlrn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.hu.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssrlrn_hu_w(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvssrlrn_hu_w(_1, _2); } + // CHECK-LABEL: @xvssrlrn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.wu.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssrlrn_wu_d(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvssrlrn_wu_d(_1, _2); } + // CHECK-LABEL: @xvfrstpi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstpi.b(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvfrstpi_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvfrstpi_b(_1, _2, 1); } + // CHECK-LABEL: @xvfrstpi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstpi.h(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvfrstpi_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvfrstpi_h(_1, _2, 1); } + // CHECK-LABEL: @xvfrstp_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvfrstp.b(<32 x i8> _1, <32 x i8> _2, <32 x i8> _3) ++// CHECK-NEXT: store <32 x i8> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvfrstp_b(v32i8 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvfrstp_b(_1, _2, _3); } + // CHECK-LABEL: @xvfrstp_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvfrstp.h(<16 x i16> _1, <16 x i16> _2, <16 x i16> _3) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvfrstp_h(v16i16 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvfrstp_h(_1, _2, _3); } + // CHECK-LABEL: @xvshuf4i_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvshuf4i.d(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvshuf4i_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvshuf4i_d(_1, _2, 1); } + // CHECK-LABEL: @xvbsrl_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbsrl.v(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvbsrl_v(v32i8 _1) { return __builtin_lasx_xvbsrl_v(_1, 1); } + // CHECK-LABEL: @xvbsll_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvbsll.v(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvbsll_v(v32i8 _1) { return __builtin_lasx_xvbsll_v(_1, 1); } + // CHECK-LABEL: @xvextrins_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvextrins.b(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvextrins_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvextrins_b(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvextrins.h(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvextrins_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvextrins_h(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvextrins.w(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvextrins_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvextrins_w(_1, _2, 1); } + // CHECK-LABEL: @xvextrins_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextrins.d(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvextrins_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvextrins_d(_1, _2, 1); } + // CHECK-LABEL: @xvmskltz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmskltz.b(<32 x i8> _1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmskltz_b(v32i8 _1) { return __builtin_lasx_xvmskltz_b(_1); } + // CHECK-LABEL: @xvmskltz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmskltz.h(<16 x i16> _1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmskltz_h(v16i16 _1) { return __builtin_lasx_xvmskltz_h(_1); } + // CHECK-LABEL: @xvmskltz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmskltz.w(<8 x i32> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmskltz_w(v8i32 _1) { return __builtin_lasx_xvmskltz_w(_1); } + // CHECK-LABEL: @xvmskltz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmskltz.d(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmskltz_d(v4i64 _1) { return __builtin_lasx_xvmskltz_d(_1); } + // CHECK-LABEL: @xvsigncov_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsigncov.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsigncov_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsigncov_b(_1, _2); } + // CHECK-LABEL: @xvsigncov_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsigncov.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsigncov_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsigncov_h(_1, _2); } + // CHECK-LABEL: @xvsigncov_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsigncov.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsigncov_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsigncov_w(_1, _2); } + // CHECK-LABEL: @xvsigncov_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsigncov.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsigncov_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsigncov_d(_1, _2); } + // CHECK-LABEL: @xvfmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> _1:%.*, <8 x float> _2:%.*, <8 x float> _3:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <8 x float>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmadd.s(<8 x float> _1, <8 x float> _2, <8 x float> _3) ++// CHECK-NEXT: store <8 x float> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmadd_s(_1, _2, _3); } + // CHECK-LABEL: @xvfmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> _1:%.*, <4 x double> _2:%.*, <4 x double> _3:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x double>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmadd.d(<4 x double> _1, <4 x double> _2, <4 x double> _3) ++// CHECK-NEXT: store <4 x double> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvfmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> _1:%.*, <8 x float> _2:%.*, <8 x float> _3:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <8 x float>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfmsub.s(<8 x float> _1, <8 x float> _2, <8 x float> _3) ++// CHECK-NEXT: store <8 x float> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfmsub_s(_1, _2, _3); } + // CHECK-LABEL: @xvfmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> _1:%.*, <4 x double> _2:%.*, <4 x double> _3:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x double>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfmsub.d(<4 x double> _1, <4 x double> _2, <4 x double> _3) ++// CHECK-NEXT: store <4 x double> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvfnmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> _1:%.*, <8 x float> _2:%.*, <8 x float> _3:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <8 x float>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfnmadd.s(<8 x float> _1, <8 x float> _2, <8 x float> _3) ++// CHECK-NEXT: store <8 x float> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfnmadd_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmadd_s(_1, _2, _3); } + // CHECK-LABEL: @xvfnmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> _1:%.*, <4 x double> _2:%.*, <4 x double> _3:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x double>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfnmadd.d(<4 x double> _1, <4 x double> _2, <4 x double> _3) ++// CHECK-NEXT: store <4 x double> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfnmadd_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmadd_d(_1, _2, _3); } + // CHECK-LABEL: @xvfnmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> _1:%.*, <8 x float> _2:%.*, <8 x float> _3:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <8 x float>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfnmsub.s(<8 x float> _1, <8 x float> _2, <8 x float> _3) ++// CHECK-NEXT: store <8 x float> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvfnmsub_s(v8f32 _1, v8f32 _2, v8f32 _3) { return __builtin_lasx_xvfnmsub_s(_1, _2, _3); } + // CHECK-LABEL: @xvfnmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> _1:%.*, <4 x double> _2:%.*, <4 x double> _3:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x double>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfnmsub.d(<4 x double> _1, <4 x double> _2, <4 x double> _3) ++// CHECK-NEXT: store <4 x double> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvfnmsub_d(v4f64 _1, v4f64 _2, v4f64 _3) { return __builtin_lasx_xvfnmsub_d(_1, _2, _3); } + // CHECK-LABEL: @xvftintrne_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftintrne_w_s(v8f32 _1) { return __builtin_lasx_xvftintrne_w_s(_1); } + // CHECK-LABEL: @xvftintrne_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrne.l.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrne_l_d(v4f64 _1) { return __builtin_lasx_xvftintrne_l_d(_1); } + // CHECK-LABEL: @xvftintrp_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftintrp_w_s(v8f32 _1) { return __builtin_lasx_xvftintrp_w_s(_1); } + // CHECK-LABEL: @xvftintrp_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrp.l.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrp_l_d(v4f64 _1) { return __builtin_lasx_xvftintrp_l_d(_1); } + // CHECK-LABEL: @xvftintrm_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftintrm_w_s(v8f32 _1) { return __builtin_lasx_xvftintrm_w_s(_1); } + // CHECK-LABEL: @xvftintrm_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrm.l.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrm_l_d(v4f64 _1) { return __builtin_lasx_xvftintrm_l_d(_1); } + // CHECK-LABEL: @xvftint_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftint.w.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftint_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftint_w_d(_1, _2); } + // CHECK-LABEL: @xvffint_s_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvffint.s.l(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x float> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvffint_s_l(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvffint_s_l(_1, _2); } + // CHECK-LABEL: @xvftintrz_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrz.w.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftintrz_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrz_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrp_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrp.w.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftintrp_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrp_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrm_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrm.w.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftintrm_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrm_w_d(_1, _2); } + // CHECK-LABEL: @xvftintrne_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvftintrne.w.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvftintrne_w_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvftintrne_w_d(_1, _2); } + // CHECK-LABEL: @xvftinth_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftinth.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftinth_l_s(v8f32 _1) { return __builtin_lasx_xvftinth_l_s(_1); } + // CHECK-LABEL: @xvftintl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintl.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintl_l_s(v8f32 _1) { return __builtin_lasx_xvftintl_l_s(_1); } + // CHECK-LABEL: @xvffinth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffinth.d.w(<8 x i32> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvffinth_d_w(v8i32 _1) { return __builtin_lasx_xvffinth_d_w(_1); } + // CHECK-LABEL: @xvffintl_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvffintl.d.w(<8 x i32> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvffintl_d_w(v8i32 _1) { return __builtin_lasx_xvffintl_d_w(_1); } + // CHECK-LABEL: @xvftintrzh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzh.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrzh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzh_l_s(_1); } + // CHECK-LABEL: @xvftintrzl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrzl.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrzl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrzl_l_s(_1); } + // CHECK-LABEL: @xvftintrph_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrph.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrph_l_s(v8f32 _1) { return __builtin_lasx_xvftintrph_l_s(_1); } + // CHECK-LABEL: @xvftintrpl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrpl.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrpl_l_s(v8f32 _1) { return __builtin_lasx_xvftintrpl_l_s(_1); } + // CHECK-LABEL: @xvftintrmh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrmh.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrmh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrmh_l_s(_1); } + // CHECK-LABEL: @xvftintrml_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrml.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrml_l_s(v8f32 _1) { return __builtin_lasx_xvftintrml_l_s(_1); } + // CHECK-LABEL: @xvftintrneh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrneh.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrneh_l_s(v8f32 _1) { return __builtin_lasx_xvftintrneh_l_s(_1); } + // CHECK-LABEL: @xvftintrnel_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvftintrnel.l.s(<8 x float> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvftintrnel_l_s(v8f32 _1) { return __builtin_lasx_xvftintrnel_l_s(_1); } + // CHECK-LABEL: @xvfrintrne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <8 x float> TMP0 to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> TMP1 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrne.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrne_s(v8f32 _1) { return __builtin_lasx_xvfrintrne_s(_1); } + // CHECK-LABEL: @xvfrintrne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <4 x double> TMP0 to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> TMP1 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrne.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrne_d(v4f64 _1) { return __builtin_lasx_xvfrintrne_d(_1); } + // CHECK-LABEL: @xvfrintrz_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <8 x float> TMP0 to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> TMP1 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrz.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrz_s(v8f32 _1) { return __builtin_lasx_xvfrintrz_s(_1); } + // CHECK-LABEL: @xvfrintrz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <4 x double> TMP0 to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> TMP1 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrz.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrz_d(v4f64 _1) { return __builtin_lasx_xvfrintrz_d(_1); } + // CHECK-LABEL: @xvfrintrp_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <8 x float> TMP0 to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> TMP1 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrp.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrp_s(v8f32 _1) { return __builtin_lasx_xvfrintrp_s(_1); } + // CHECK-LABEL: @xvfrintrp_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <4 x double> TMP0 to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> TMP1 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrp.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrp_d(v4f64 _1) { return __builtin_lasx_xvfrintrp_d(_1); } + // CHECK-LABEL: @xvfrintrm_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <8 x float> TMP0 to <8 x i32> +-// CHECK-NEXT: ret <8 x i32> TMP1 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvfrintrm.s(<8 x float> _1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfrintrm_s(v8f32 _1) { return __builtin_lasx_xvfrintrm_s(_1); } + // CHECK-LABEL: @xvfrintrm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <4 x double> TMP0 to <4 x i64> +-// CHECK-NEXT: ret <4 x i64> TMP1 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvfrintrm.d(<4 x double> _1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfrintrm_d(v4f64 _1) { return __builtin_lasx_xvfrintrm_d(_1); } + // CHECK-LABEL: @xvld( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvld(ptr _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: store <32 x i8> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvld(void *_1) { return __builtin_lasx_xvld(_1, 1); } + // CHECK-LABEL: @xvst( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> _1:%.*, ptr _2:%.*, i32 1) ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvst(<32 x i8> _1, ptr _2:%.*, i32 1) + // CHECK-NEXT: ret void + // + void xvst(v32i8 _1, void *_2) { return __builtin_lasx_xvst(_1, _2, 1); } + // CHECK-LABEL: @xvstelm_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> _1:%.*, ptr _2:%.*, i32 1, i32 1) ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.b(<32 x i8> _1, ptr _2:%.*, i32 1, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_b(v32i8 _1, void * _2) { return __builtin_lasx_xvstelm_b(_1, _2, 1, 1); } + // CHECK-LABEL: @xvstelm_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> _1:%.*, ptr _2:%.*, i32 2, i32 1) ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.h(<16 x i16> _1, ptr _2:%.*, i32 2, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_h(v16i16 _1, void * _2) { return __builtin_lasx_xvstelm_h(_1, _2, 2, 1); } + // CHECK-LABEL: @xvstelm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> _1:%.*, ptr _2:%.*, i32 4, i32 1) ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.w(<8 x i32> _1, ptr _2:%.*, i32 4, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_w(v8i32 _1, void * _2) { return __builtin_lasx_xvstelm_w(_1, _2, 4, 1); } + // CHECK-LABEL: @xvstelm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> _1:%.*, ptr _2:%.*, i32 8, i32 1) ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstelm.d(<4 x i64> _1, ptr _2:%.*, i32 8, i32 1) + // CHECK-NEXT: ret void + // + void xvstelm_d(v4i64 _1, void * _2) { return __builtin_lasx_xvstelm_d(_1, _2, 8, 1); } + // CHECK-LABEL: @xvinsve0_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvinsve0.w(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvinsve0_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvinsve0_w(_1, _2, 1); } + // CHECK-LABEL: @xvinsve0_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvinsve0.d(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvinsve0_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvinsve0_d(_1, _2, 1); } + // CHECK-LABEL: @xvpickve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvpickve.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvpickve_w(v8i32 _1) { return __builtin_lasx_xvpickve_w(_1, 1); } + // CHECK-LABEL: @xvpickve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpickve.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvpickve_d(v4i64 _1) { return __builtin_lasx_xvpickve_d(_1, 1); } + // CHECK-LABEL: @xvssrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrn.b.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssrlrn_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrn_b_h(_1, _2); } + // CHECK-LABEL: @xvssrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrn.h.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssrlrn_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrn_h_w(_1, _2); } + // CHECK-LABEL: @xvssrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrn.w.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssrlrn_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrn_w_d(_1, _2); } + // CHECK-LABEL: @xvssrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrln.b.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssrln_b_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrln_b_h(_1, _2); } + // CHECK-LABEL: @xvssrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrln.h.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssrln_h_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrln_h_w(_1, _2); } + // CHECK-LABEL: @xvssrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrln.w.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssrln_w_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrln_w_d(_1, _2); } + // CHECK-LABEL: @xvorn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvorn.v(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvorn_v(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvorn_v(_1, _2); } + // CHECK-LABEL: @xvldi( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvldi(i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: store <4 x i64> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvldi() { return __builtin_lasx_xvldi(1); } + // CHECK-LABEL: @xvldx( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvldx(ptr _1:%.*, i64 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: store <32 x i8> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvldx(void *_1) { return __builtin_lasx_xvldx(_1, 1); } + // CHECK-LABEL: @xvstx( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> _1:%.*, ptr _2:%.*, i64 1) ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: tail call void @llvm.loongarch.lasx.xvstx(<32 x i8> _1, ptr _2:%.*, i64 1) + // CHECK-NEXT: ret void + // + void xvstx(v32i8 _1, void *_2) { return __builtin_lasx_xvstx(_1, _2, 1); } + // CHECK-LABEL: @xvextl_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.qu.du(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvextl_qu_du(v4u64 _1) { return __builtin_lasx_xvextl_qu_du(_1); } + // CHECK-LABEL: @xvinsgr2vr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> _1:%.*, i32 1, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvinsgr2vr.w(<8 x i32> _1, i32 1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvinsgr2vr_w(v8i32 _1) { return __builtin_lasx_xvinsgr2vr_w(_1, 1, 1); } + // CHECK-LABEL: @xvinsgr2vr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> _1:%.*, i64 1, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvinsgr2vr.d(<4 x i64> _1, i64 1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvinsgr2vr_d(v4i64 _1) { return __builtin_lasx_xvinsgr2vr_d(_1, 1, 1); } + // CHECK-LABEL: @xvreplve0_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.b(<32 x i8> _1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvreplve0_b(v32i8 _1) { return __builtin_lasx_xvreplve0_b(_1); } + // CHECK-LABEL: @xvreplve0_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvreplve0.h(<16 x i16> _1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvreplve0_h(v16i16 _1) { return __builtin_lasx_xvreplve0_h(_1); } + // CHECK-LABEL: @xvreplve0_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvreplve0.w(<8 x i32> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvreplve0_w(v8i32 _1) { return __builtin_lasx_xvreplve0_w(_1); } + // CHECK-LABEL: @xvreplve0_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvreplve0.d(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvreplve0_d(v4i64 _1) { return __builtin_lasx_xvreplve0_d(_1); } + // CHECK-LABEL: @xvreplve0_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvreplve0.q(<32 x i8> _1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvreplve0_q(v32i8 _1) { return __builtin_lasx_xvreplve0_q(_1); } + // CHECK-LABEL: @vext2xv_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.h.b(<32 x i8> _1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 vext2xv_h_b(v32i8 _1) { return __builtin_lasx_vext2xv_h_b(_1); } + // CHECK-LABEL: @vext2xv_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.h(<16 x i16> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_w_h(v16i16 _1) { return __builtin_lasx_vext2xv_w_h(_1); } + // CHECK-LABEL: @vext2xv_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.w(<8 x i32> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_w(v8i32 _1) { return __builtin_lasx_vext2xv_d_w(_1); } + // CHECK-LABEL: @vext2xv_w_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.w.b(<32 x i8> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_w_b(v32i8 _1) { return __builtin_lasx_vext2xv_w_b(_1); } + // CHECK-LABEL: @vext2xv_d_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.h(<16 x i16> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_h(v16i16 _1) { return __builtin_lasx_vext2xv_d_h(_1); } + // CHECK-LABEL: @vext2xv_d_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.d.b(<32 x i8> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_d_b(v32i8 _1) { return __builtin_lasx_vext2xv_d_b(_1); } + // CHECK-LABEL: @vext2xv_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.vext2xv.hu.bu(<32 x i8> _1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 vext2xv_hu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_hu_bu(_1); } + // CHECK-LABEL: @vext2xv_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.hu(<16 x i16> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_wu_hu(v16i16 _1) { return __builtin_lasx_vext2xv_wu_hu(_1); } + // CHECK-LABEL: @vext2xv_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.wu(<8 x i32> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_wu(v8i32 _1) { return __builtin_lasx_vext2xv_du_wu(_1); } + // CHECK-LABEL: @vext2xv_wu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.vext2xv.wu.bu(<32 x i8> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 vext2xv_wu_bu(v32i8 _1) { return __builtin_lasx_vext2xv_wu_bu(_1); } + // CHECK-LABEL: @vext2xv_du_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.hu(<16 x i16> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_hu(v16i16 _1) { return __builtin_lasx_vext2xv_du_hu(_1); } + // CHECK-LABEL: @vext2xv_du_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.vext2xv.du.bu(<32 x i8> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 vext2xv_du_bu(v32i8 _1) { return __builtin_lasx_vext2xv_du_bu(_1); } + // CHECK-LABEL: @xvpermi_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvpermi.q(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvpermi_q(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvpermi_q(_1, _2, 1); } + // CHECK-LABEL: @xvpermi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvpermi.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvpermi_d(v4i64 _1) { return __builtin_lasx_xvpermi_d(_1, 1); } + // CHECK-LABEL: @xvperm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvperm.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvperm_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvperm_w(_1, _2); } + // CHECK-LABEL: @xvldrepl_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvldrepl.b(ptr _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: store <32 x i8> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvldrepl_b(void *_1) { return __builtin_lasx_xvldrepl_b(_1, 1); } + // CHECK-LABEL: @xvldrepl_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvldrepl.h(ptr _1:%.*, i32 2) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: store <16 x i16> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvldrepl_h(void *_1) { return __builtin_lasx_xvldrepl_h(_1, 2); } + // CHECK-LABEL: @xvldrepl_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvldrepl.w(ptr _1:%.*, i32 4) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: store <8 x i32> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvldrepl_w(void *_1) { return __builtin_lasx_xvldrepl_w(_1, 4); } + // CHECK-LABEL: @xvldrepl_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvldrepl.d(ptr _1:%.*, i32 8) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: store <4 x i64> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvldrepl_d(void *_1) { return __builtin_lasx_xvldrepl_d(_1, 8); } + // CHECK-LABEL: @xvpickve2gr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xvpickve2gr_w(v8i32 _1) { return __builtin_lasx_xvpickve2gr_w(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xvpickve2gr.wu(<8 x i32> _1, i32 1) ++// CHECK-NEXT: ret i32 TMP1 + // + unsigned int xvpickve2gr_wu(v8i32 _1) { return __builtin_lasx_xvpickve2gr_wu(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret i64 TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: ret i64 TMP1 + // + long xvpickve2gr_d(v4i64 _1) { return __builtin_lasx_xvpickve2gr_d(_1, 1); } + // CHECK-LABEL: @xvpickve2gr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret i64 TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i64 @llvm.loongarch.lasx.xvpickve2gr.du(<4 x i64> _1, i32 1) ++// CHECK-NEXT: ret i64 TMP1 + // + unsigned long int xvpickve2gr_du(v4i64 _1) { return __builtin_lasx_xvpickve2gr_du(_1, 1); } + // CHECK-LABEL: @xvaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_d(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_w(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_h(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_b(_1, _2); } + // CHECK-LABEL: @xvaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwev_q_du(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvsubwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwev_q_d(_1, _2); } + // CHECK-LABEL: @xvsubwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwev_d_w(_1, _2); } + // CHECK-LABEL: @xvsubwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsubwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwev_w_h(_1, _2); } + // CHECK-LABEL: @xvsubwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsubwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwev_h_b(_1, _2); } + // CHECK-LABEL: @xvsubwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.q.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwev_q_du(_1, _2); } + // CHECK-LABEL: @xvsubwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwev.d.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvsubwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwev.w.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsubwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvsubwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwev.h.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsubwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_d(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_w(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_h(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_b(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwev_q_du(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwev_d_wu(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwev_w_hu(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwev_h_bu(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_d(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_w(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_h(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_b(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvaddwod_q_du(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvaddwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvaddwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvaddwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvsubwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsubwod_q_d(_1, _2); } + // CHECK-LABEL: @xvsubwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsubwod_d_w(_1, _2); } + // CHECK-LABEL: @xvsubwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsubwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsubwod_w_h(_1, _2); } + // CHECK-LABEL: @xvsubwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsubwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsubwod_h_b(_1, _2); } + // CHECK-LABEL: @xvsubwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.q.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvsubwod_q_du(_1, _2); } + // CHECK-LABEL: @xvsubwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsubwod.d.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsubwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvsubwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvsubwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsubwod.w.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsubwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvsubwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvsubwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsubwod.h.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsubwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvsubwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_d(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_w(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_h(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_b(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvmulwod_q_du(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_wu(v8u32 _1, v8u32 _2) { return __builtin_lasx_xvmulwod_d_wu(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_hu(v16u16 _1, v16u16 _2) { return __builtin_lasx_xvmulwod_w_hu(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_bu(v32u8 _1, v32u8 _2) { return __builtin_lasx_xvmulwod_h_bu(_1, _2); } + // CHECK-LABEL: @xvaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.d.wu.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwev_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwev.w.hu.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvaddwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwev_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwev.h.bu.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvaddwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwev_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvmulwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.d.wu.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwev_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvmulwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwev.w.hu.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmulwev_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwev_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvmulwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwev.h.bu.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmulwev_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwev_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.d.wu.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvaddwod_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvaddwod.w.hu.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvaddwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvaddwod_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvaddwod.h.bu.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvaddwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvaddwod_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvmulwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.d.wu.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_d_wu_w(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvmulwod_d_wu_w(_1, _2); } + // CHECK-LABEL: @xvmulwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmulwod.w.hu.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmulwod_w_hu_h(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvmulwod_w_hu_h(_1, _2); } + // CHECK-LABEL: @xvmulwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmulwod.h.bu.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmulwod_h_bu_b(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvmulwod_h_bu_b(_1, _2); } + // CHECK-LABEL: @xvhaddw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.q.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvhaddw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhaddw_q_d(_1, _2); } + // CHECK-LABEL: @xvhaddw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhaddw.qu.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvhaddw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhaddw_qu_du(_1, _2); } + // CHECK-LABEL: @xvhsubw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.q.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvhsubw_q_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvhsubw_q_d(_1, _2); } + // CHECK-LABEL: @xvhsubw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvhsubw.qu.du(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvhsubw_qu_du(v4u64 _1, v4u64 _2) { return __builtin_lasx_xvhsubw_qu_du(_1, _2); } + // CHECK-LABEL: @xvmaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.d(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.w(<4 x i64> _1, <8 x i32> _2, <8 x i32> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.h(<8 x i32> _1, <16 x i16> _2, <16 x i16> _3) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwev_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.b(<16 x i16> _1, <32 x i8> _2, <32 x i8> _3) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwev_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwev_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwev_q_du(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu(<4 x i64> _1, <8 x i32> _2, <8 x i32> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwev_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwev_d_wu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu(<8 x i32> _1, <16 x i16> _2, <16 x i16> _3) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvmaddwev_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwev_w_hu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu(<16 x i16> _1, <32 x i8> _2, <32 x i8> _3) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvmaddwev_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwev_h_bu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.d(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_q_d(v4i64 _1, v4i64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.w(<4 x i64> _1, <8 x i32> _2, <8 x i32> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_d_w(v4i64 _1, v8i32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.h(<8 x i32> _1, <16 x i16> _2, <16 x i16> _3) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwod_w_h(v8i32 _1, v16i16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.b(<16 x i16> _1, <32 x i8> _2, <32 x i8> _3) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwod_h_b(v16i16 _1, v32i8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwod_q_du(v4u64 _1, v4u64 _2, v4u64 _3) { return __builtin_lasx_xvmaddwod_q_du(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu(<4 x i64> _1, <8 x i32> _2, <8 x i32> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvmaddwod_d_wu(v4u64 _1, v8u32 _2, v8u32 _3) { return __builtin_lasx_xvmaddwod_d_wu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu(<8 x i32> _1, <16 x i16> _2, <16 x i16> _3) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvmaddwod_w_hu(v8u32 _1, v16u16 _2, v16u16 _3) { return __builtin_lasx_xvmaddwod_w_hu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu(<16 x i16> _1, <32 x i8> _2, <32 x i8> _3) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvmaddwod_h_bu(v16u16 _1, v32u8 _2, v32u8 _3) { return __builtin_lasx_xvmaddwod_h_bu(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.q.du.d(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwev_q_du_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwev.d.wu.w(<4 x i64> _1, <8 x i32> _2, <8 x i32> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwev_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwev_d_wu_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwev.w.hu.h(<8 x i32> _1, <16 x i16> _2, <16 x i16> _3) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwev_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwev_w_hu_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwev.h.bu.b(<16 x i16> _1, <32 x i8> _2, <32 x i8> _3) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwev_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwev_h_bu_b(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*, <4 x i64> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <4 x i64>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.q.du.d(<4 x i64> _1, <4 x i64> _2, <4 x i64> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_q_du_d(v4i64 _1, v4u64 _2, v4i64 _3) { return __builtin_lasx_xvmaddwod_q_du_d(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> _1:%.*, <8 x i32> _2:%.*, <8 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <8 x i32>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmaddwod.d.wu.w(<4 x i64> _1, <8 x i32> _2, <8 x i32> _3) ++// CHECK-NEXT: store <4 x i64> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmaddwod_d_wu_w(v4i64 _1, v8u32 _2, v8i32 _3) { return __builtin_lasx_xvmaddwod_d_wu_w(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> _1:%.*, <16 x i16> _2:%.*, <16 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <16 x i16>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvmaddwod.w.hu.h(<8 x i32> _1, <16 x i16> _2, <16 x i16> _3) ++// CHECK-NEXT: store <8 x i32> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvmaddwod_w_hu_h(v8i32 _1, v16u16 _2, v16i16 _3) { return __builtin_lasx_xvmaddwod_w_hu_h(_1, _2, _3); } + // CHECK-LABEL: @xvmaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> _1:%.*, <32 x i8> _2:%.*, <32 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _3:%.* = load <32 x i8>, ptr TMP2:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvmaddwod.h.bu.b(<16 x i16> _1, <32 x i8> _2, <32 x i8> _3) ++// CHECK-NEXT: store <16 x i16> TMP3, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvmaddwod_h_bu_b(v16i16 _1, v32u8 _2, v32i8 _3) { return __builtin_lasx_xvmaddwod_h_bu_b(_1, _2, _3); } + // CHECK-LABEL: @xvrotr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> _1:%.*, <32 x i8> _2:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrotr.b(<32 x i8> _1, <32 x i8> _2) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvrotr_b(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvrotr_b(_1, _2); } + // CHECK-LABEL: @xvrotr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> _1:%.*, <16 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrotr.h(<16 x i16> _1, <16 x i16> _2) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvrotr_h(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvrotr_h(_1, _2); } + // CHECK-LABEL: @xvrotr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> _1:%.*, <8 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrotr.w(<8 x i32> _1, <8 x i32> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvrotr_w(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvrotr_w(_1, _2); } + // CHECK-LABEL: @xvrotr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrotr.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvrotr_d(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvrotr_d(_1, _2); } + // CHECK-LABEL: @xvadd_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvadd.q(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvadd_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvadd_q(_1, _2); } + // CHECK-LABEL: @xvsub_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsub.q(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsub_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsub_q(_1, _2); } + // CHECK-LABEL: @xvaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwev.q.du.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwev_q_du_d(_1, _2); } + // CHECK-LABEL: @xvaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvaddwod.q.du.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvaddwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvaddwod_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmulwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwev.q.du.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwev_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwev_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmulwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> _1:%.*, <4 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvmulwod.q.du.d(<4 x i64> _1, <4 x i64> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvmulwod_q_du_d(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvmulwod_q_du_d(_1, _2); } + // CHECK-LABEL: @xvmskgez_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmskgez.b(<32 x i8> _1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmskgez_b(v32i8 _1) { return __builtin_lasx_xvmskgez_b(_1); } + // CHECK-LABEL: @xvmsknz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvmsknz.b(<32 x i8> _1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvmsknz_b(v32i8 _1) { return __builtin_lasx_xvmsknz_b(_1); } + // CHECK-LABEL: @xvexth_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.h.b(<32 x i8> _1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvexth_h_b(v32i8 _1) { return __builtin_lasx_xvexth_h_b(_1); } + // CHECK-LABEL: @xvexth_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.w.h(<16 x i16> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvexth_w_h(v16i16 _1) { return __builtin_lasx_xvexth_w_h(_1); } + // CHECK-LABEL: @xvexth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.d.w(<8 x i32> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvexth_d_w(v8i32 _1) { return __builtin_lasx_xvexth_d_w(_1); } + // CHECK-LABEL: @xvexth_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.q.d(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvexth_q_d(v4i64 _1) { return __builtin_lasx_xvexth_q_d(_1); } + // CHECK-LABEL: @xvexth_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvexth.hu.bu(<32 x i8> _1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvexth_hu_bu(v32u8 _1) { return __builtin_lasx_xvexth_hu_bu(_1); } + // CHECK-LABEL: @xvexth_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvexth.wu.hu(<16 x i16> _1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvexth_wu_hu(v16u16 _1) { return __builtin_lasx_xvexth_wu_hu(_1); } + // CHECK-LABEL: @xvexth_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.du.wu(<8 x i32> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvexth_du_wu(v8u32 _1) { return __builtin_lasx_xvexth_du_wu(_1); } + // CHECK-LABEL: @xvexth_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvexth.qu.du(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvexth_qu_du(v4u64 _1) { return __builtin_lasx_xvexth_qu_du(_1); } + // CHECK-LABEL: @xvrotri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrotri.b(<32 x i8> _1, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvrotri_b(v32i8 _1) { return __builtin_lasx_xvrotri_b(_1, 1); } + // CHECK-LABEL: @xvrotri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrotri.h(<16 x i16> _1, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvrotri_h(v16i16 _1) { return __builtin_lasx_xvrotri_h(_1, 1); } + // CHECK-LABEL: @xvrotri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrotri.w(<8 x i32> _1, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvrotri_w(v8i32 _1) { return __builtin_lasx_xvrotri_w(_1, 1); } + // CHECK-LABEL: @xvrotri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrotri.d(<4 x i64> _1, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvrotri_d(v4i64 _1) { return __builtin_lasx_xvrotri_d(_1, 1); } + // CHECK-LABEL: @xvextl_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvextl.q.d(<4 x i64> _1) ++// CHECK-NEXT: store <4 x i64> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvextl_q_d(v4i64 _1) { return __builtin_lasx_xvextl_q_d(_1); } + // CHECK-LABEL: @xvsrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlni.b.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlni.h.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlni.w.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlni.d.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrlrni.b.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrlrni.h.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrlrni.w.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrlrni.d.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.b.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssrlni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.h.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssrlni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.w.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssrlni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.d.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvssrlni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlni.bu.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssrlni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlni.hu.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssrlni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlni.wu.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssrlni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlni.du.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvssrlni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.b.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssrlrni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.h.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssrlrni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.w.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssrlrni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.d.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvssrlrni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrlrni.bu.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssrlrni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrlrni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrlrni.hu.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssrlrni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrlrni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrlrni.wu.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssrlrni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrlrni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrlrni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrlrni.du.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvssrlrni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrlrni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrani.b.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrani.h.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrani.w.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrani.d.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvsrarni.b.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvsrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvsrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvsrarni.h.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvsrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvsrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvsrarni.w.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvsrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvsrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvsrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvsrarni.d.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvsrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvsrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.b.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssrani_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.h.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssrani_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.w.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssrani_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.d.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvssrani_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrani.bu.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssrani_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrani_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrani.hu.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssrani_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrani_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrani.wu.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssrani_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrani_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrani_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrani.du.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvssrani_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrani_du_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.b.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvssrarni_b_h(v32i8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.h.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvssrarni_h_w(v16i16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.w.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvssrarni_w_d(v8i32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.d.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvssrarni_d_q(v4i64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> _1:%.*, <32 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <32 x i8>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvssrarni.bu.h(<32 x i8> _1, <32 x i8> _2, i32 1) ++// CHECK-NEXT: store <32 x i8> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32u8 xvssrarni_bu_h(v32u8 _1, v32i8 _2) { return __builtin_lasx_xvssrarni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> _1:%.*, <16 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <16 x i16>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvssrarni.hu.w(<16 x i16> _1, <16 x i16> _2, i32 1) ++// CHECK-NEXT: store <16 x i16> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16u16 xvssrarni_hu_w(v16u16 _1, v16i16 _2) { return __builtin_lasx_xvssrarni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> _1:%.*, <8 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x i32>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvssrarni.wu.d(<8 x i32> _1, <8 x i32> _2, i32 1) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8u32 xvssrarni_wu_d(v8u32 _1, v8i32 _2) { return __builtin_lasx_xvssrarni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @xvssrarni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> _1:%.*, <4 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x i64>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvssrarni.du.q(<4 x i64> _1, <4 x i64> _2, i32 1) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4u64 xvssrarni_du_q(v4u64 _1, v4i64 _2) { return __builtin_lasx_xvssrarni_du_q(_1, _2, 1); } + // CHECK-LABEL: @xbnz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.b(<32 x i8> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbnz_b(v32u8 _1) { return __builtin_lasx_xbnz_b(_1); } + // CHECK-LABEL: @xbnz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.d(<4 x i64> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbnz_d(v4u64 _1) { return __builtin_lasx_xbnz_d(_1); } + // CHECK-LABEL: @xbnz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.h(<16 x i16> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbnz_h(v16u16 _1) { return __builtin_lasx_xbnz_h(_1); } + // CHECK-LABEL: @xbnz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.v(<32 x i8> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbnz_v(v32u8 _1) { return __builtin_lasx_xbnz_v(_1); } + // CHECK-LABEL: @xbnz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbnz.w(<8 x i32> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbnz_w(v8u32 _1) { return __builtin_lasx_xbnz_w(_1); } + // CHECK-LABEL: @xbz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbz.b(<32 x i8> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbz_b(v32u8 _1) { return __builtin_lasx_xbz_b(_1); } + // CHECK-LABEL: @xbz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x i64>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbz.d(<4 x i64> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbz_d(v4u64 _1) { return __builtin_lasx_xbz_d(_1); } + // CHECK-LABEL: @xbz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <16 x i16>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbz.h(<16 x i16> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbz_h(v16u16 _1) { return __builtin_lasx_xbz_h(_1); } + // CHECK-LABEL: @xbz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <32 x i8>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbz.v(<32 x i8> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbz_v(v32u8 _1) { return __builtin_lasx_xbz_v(_1); } + // CHECK-LABEL: @xbz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x i32>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lasx.xbz.w(<8 x i32> _1) ++// CHECK-NEXT: ret i32 TMP1 + // + int xbz_w(v8u32 _1) { return __builtin_lasx_xbz_w(_1); } + // CHECK-LABEL: @xvfcmp_caf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.caf.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_caf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_caf_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_caf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.caf.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_caf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_caf_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_ceq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.ceq.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_ceq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_ceq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_ceq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.ceq.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_ceq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_ceq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cle.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cle_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cle.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cle_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_clt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.clt.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_clt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_clt_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_clt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.clt.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_clt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_clt_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cne.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cne_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cne.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cne_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cor.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cor_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cor.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cor_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cueq.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cueq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cueq.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cueq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cule.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cule_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cule.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cule_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cult.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cult_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cult.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cult_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cun.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cun_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.cune.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_cune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_cune_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_cune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cune.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cune_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_cun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.cun.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_cun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_cun_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_saf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.saf.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_saf_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_saf_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_saf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.saf.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_saf_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_saf_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_seq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.seq.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_seq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_seq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_seq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.seq.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_seq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_seq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sle.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sle_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sle_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sle.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sle_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sle_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_slt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.slt.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_slt_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_slt_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_slt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.slt.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_slt_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_slt_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sne.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sne_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sne_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sne.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sne_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sne_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sor.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sor_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sor_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sor.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sor_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sor_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sueq.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sueq_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sueq_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sueq.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sueq_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sueq_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sule.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sule_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sule_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sule.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sule_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sule_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sult.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sult_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sult_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sult.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sult_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sult_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sun.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sun_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sun_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> _1:%.*, <4 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <4 x double>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvfcmp.sune.d(<4 x double> _1, <4 x double> _2) ++// CHECK-NEXT: store <4 x i64> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvfcmp_sune_d(v4f64 _1, v4f64 _2) { return __builtin_lasx_xvfcmp_sune_d(_1, _2); } + // CHECK-LABEL: @xvfcmp_sune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sune.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sune_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sune_s(_1, _2); } + // CHECK-LABEL: @xvfcmp_sun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> _1:%.*, <8 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: _2:%.* = load <8 x float>, ptr TMP1:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvfcmp.sun.s(<8 x float> _1, <8 x float> _2) ++// CHECK-NEXT: store <8 x i32> TMP2, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvfcmp_sun_s(v8f32 _1, v8f32 _2) { return __builtin_lasx_xvfcmp_sun_s(_1, _2); } + // CHECK-LABEL: @xvpickve_d_f( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x double> TMP0 ++// CHECK-NEXT: _1:%.* = load <4 x double>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <4 x double> @llvm.loongarch.lasx.xvpickve.d.f(<4 x double> _1, i32 1) ++// CHECK-NEXT: store <4 x double> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4f64 xvpickve_d_f(v4f64 _1) { return __builtin_lasx_xvpickve_d_f(_1, 1); } + // CHECK-LABEL: @xvpickve_w_f( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x float> TMP0 ++// CHECK-NEXT: _1:%.* = load <8 x float>, ptr TMP0:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: TMP1:%.* = tail call <8 x float> @llvm.loongarch.lasx.xvpickve.w.f(<8 x float> _1, i32 1) ++// CHECK-NEXT: store <8 x float> TMP1, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8f32 xvpickve_w_f(v8f32 _1) { return __builtin_lasx_xvpickve_w_f(_1, 1); } + // CHECK-LABEL: @xvrepli_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <32 x i8> @llvm.loongarch.lasx.xvrepli.b(i32 1) +-// CHECK-NEXT: ret <32 x i8> TMP0 ++// CHECK-NEXT: store <32 x i8> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v32i8 xvrepli_b() { return __builtin_lasx_xvrepli_b(1); } + // CHECK-LABEL: @xvrepli_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <4 x i64> @llvm.loongarch.lasx.xvrepli.d(i32 1) +-// CHECK-NEXT: ret <4 x i64> TMP0 ++// CHECK-NEXT: store <4 x i64> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v4i64 xvrepli_d() { return __builtin_lasx_xvrepli_d(1); } + // CHECK-LABEL: @xvrepli_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <16 x i16> @llvm.loongarch.lasx.xvrepli.h(i32 1) +-// CHECK-NEXT: ret <16 x i16> TMP0 ++// CHECK-NEXT: store <16 x i16> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v16i16 xvrepli_h() { return __builtin_lasx_xvrepli_h(1); } + // CHECK-LABEL: @xvrepli_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <8 x i32> @llvm.loongarch.lasx.xvrepli.w(i32 1) +-// CHECK-NEXT: ret <8 x i32> TMP0 ++// CHECK-NEXT: store <8 x i32> TMP0, ptr AGG_RESULT:%.*, align 32, !tbaa TBAA2 ++// CHECK-NEXT: ret void + // + v8i32 xvrepli_w() { return __builtin_lasx_xvrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c +index 331e29fb7d17..7a84e0ae24f9 100644 +--- a/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin-alias.c +@@ -5,4080 +5,5838 @@ + + // CHECK-LABEL: @vsll_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __lsx_vsll_b(_1, _2); } + // CHECK-LABEL: @vsll_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __lsx_vsll_h(_1, _2); } + // CHECK-LABEL: @vsll_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __lsx_vsll_w(_1, _2); } + // CHECK-LABEL: @vsll_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __lsx_vsll_d(_1, _2); } + // CHECK-LABEL: @vslli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vslli_b(v16i8 _1) { return __lsx_vslli_b(_1, 1); } + // CHECK-LABEL: @vslli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vslli_h(v8i16 _1) { return __lsx_vslli_h(_1, 1); } + // CHECK-LABEL: @vslli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vslli_w(v4i32 _1) { return __lsx_vslli_w(_1, 1); } + // CHECK-LABEL: @vslli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vslli_d(v2i64 _1) { return __lsx_vslli_d(_1, 1); } + // CHECK-LABEL: @vsra_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __lsx_vsra_b(_1, _2); } + // CHECK-LABEL: @vsra_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __lsx_vsra_h(_1, _2); } + // CHECK-LABEL: @vsra_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __lsx_vsra_w(_1, _2); } + // CHECK-LABEL: @vsra_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __lsx_vsra_d(_1, _2); } + // CHECK-LABEL: @vsrai_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vsrai_b(v16i8 _1) { return __lsx_vsrai_b(_1, 1); } + // CHECK-LABEL: @vsrai_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vsrai_h(v8i16 _1) { return __lsx_vsrai_h(_1, 1); } + // CHECK-LABEL: @vsrai_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vsrai_w(v4i32 _1) { return __lsx_vsrai_w(_1, 1); } + // CHECK-LABEL: @vsrai_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vsrai_d(v2i64 _1) { return __lsx_vsrai_d(_1, 1); } + // CHECK-LABEL: @vsrar_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrar_b(v16i8 _1, v16i8 _2) { return __lsx_vsrar_b(_1, _2); } + // CHECK-LABEL: @vsrar_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrar_h(v8i16 _1, v8i16 _2) { return __lsx_vsrar_h(_1, _2); } + // CHECK-LABEL: @vsrar_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrar_w(v4i32 _1, v4i32 _2) { return __lsx_vsrar_w(_1, _2); } + // CHECK-LABEL: @vsrar_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsrar_d(v2i64 _1, v2i64 _2) { return __lsx_vsrar_d(_1, _2); } + // CHECK-LABEL: @vsrari_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vsrari_b(v16i8 _1) { return __lsx_vsrari_b(_1, 1); } + // CHECK-LABEL: @vsrari_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vsrari_h(v8i16 _1) { return __lsx_vsrari_h(_1, 1); } + // CHECK-LABEL: @vsrari_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vsrari_w(v4i32 _1) { return __lsx_vsrari_w(_1, 1); } + // CHECK-LABEL: @vsrari_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vsrari_d(v2i64 _1) { return __lsx_vsrari_d(_1, 1); } + // CHECK-LABEL: @vsrl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __lsx_vsrl_b(_1, _2); } + // CHECK-LABEL: @vsrl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __lsx_vsrl_h(_1, _2); } + // CHECK-LABEL: @vsrl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __lsx_vsrl_w(_1, _2); } + // CHECK-LABEL: @vsrl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __lsx_vsrl_d(_1, _2); } + // CHECK-LABEL: @vsrli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vsrli_b(v16i8 _1) { return __lsx_vsrli_b(_1, 1); } + // CHECK-LABEL: @vsrli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vsrli_h(v8i16 _1) { return __lsx_vsrli_h(_1, 1); } + // CHECK-LABEL: @vsrli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vsrli_w(v4i32 _1) { return __lsx_vsrli_w(_1, 1); } + // CHECK-LABEL: @vsrli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vsrli_d(v2i64 _1) { return __lsx_vsrli_d(_1, 1); } + // CHECK-LABEL: @vsrlr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { return __lsx_vsrlr_b(_1, _2); } + // CHECK-LABEL: @vsrlr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlr_h(_1, _2); } + // CHECK-LABEL: @vsrlr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlr_w(_1, _2); } + // CHECK-LABEL: @vsrlr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlr_d(_1, _2); } + // CHECK-LABEL: @vsrlri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vsrlri_b(v16i8 _1) { return __lsx_vsrlri_b(_1, 1); } + // CHECK-LABEL: @vsrlri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vsrlri_h(v8i16 _1) { return __lsx_vsrlri_h(_1, 1); } + // CHECK-LABEL: @vsrlri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vsrlri_w(v4i32 _1) { return __lsx_vsrlri_w(_1, 1); } + // CHECK-LABEL: @vsrlri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vsrlri_d(v2i64 _1) { return __lsx_vsrlri_d(_1, 1); } + // CHECK-LABEL: @vbitclr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { return __lsx_vbitclr_b(_1, _2); } + // CHECK-LABEL: @vbitclr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { return __lsx_vbitclr_h(_1, _2); } + // CHECK-LABEL: @vbitclr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { return __lsx_vbitclr_w(_1, _2); } + // CHECK-LABEL: @vbitclr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { return __lsx_vbitclr_d(_1, _2); } + // CHECK-LABEL: @vbitclri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vbitclri_b(v16u8 _1) { return __lsx_vbitclri_b(_1, 1); } + // CHECK-LABEL: @vbitclri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8u16 vbitclri_h(v8u16 _1) { return __lsx_vbitclri_h(_1, 1); } + // CHECK-LABEL: @vbitclri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vbitclri_w(v4u32 _1) { return __lsx_vbitclri_w(_1, 1); } + // CHECK-LABEL: @vbitclri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vbitclri_d(v2u64 _1) { return __lsx_vbitclri_d(_1, 1); } + // CHECK-LABEL: @vbitset_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vbitset_b(v16u8 _1, v16u8 _2) { return __lsx_vbitset_b(_1, _2); } + // CHECK-LABEL: @vbitset_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vbitset_h(v8u16 _1, v8u16 _2) { return __lsx_vbitset_h(_1, _2); } + // CHECK-LABEL: @vbitset_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vbitset_w(v4u32 _1, v4u32 _2) { return __lsx_vbitset_w(_1, _2); } + // CHECK-LABEL: @vbitset_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vbitset_d(v2u64 _1, v2u64 _2) { return __lsx_vbitset_d(_1, _2); } + // CHECK-LABEL: @vbitseti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vbitseti_b(v16u8 _1) { return __lsx_vbitseti_b(_1, 1); } + // CHECK-LABEL: @vbitseti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8u16 vbitseti_h(v8u16 _1) { return __lsx_vbitseti_h(_1, 1); } + // CHECK-LABEL: @vbitseti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vbitseti_w(v4u32 _1) { return __lsx_vbitseti_w(_1, 1); } + // CHECK-LABEL: @vbitseti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vbitseti_d(v2u64 _1) { return __lsx_vbitseti_d(_1, 1); } + // CHECK-LABEL: @vbitrev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { return __lsx_vbitrev_b(_1, _2); } + // CHECK-LABEL: @vbitrev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { return __lsx_vbitrev_h(_1, _2); } + // CHECK-LABEL: @vbitrev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { return __lsx_vbitrev_w(_1, _2); } + // CHECK-LABEL: @vbitrev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { return __lsx_vbitrev_d(_1, _2); } + // CHECK-LABEL: @vbitrevi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vbitrevi_b(v16u8 _1) { return __lsx_vbitrevi_b(_1, 1); } + // CHECK-LABEL: @vbitrevi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8u16 vbitrevi_h(v8u16 _1) { return __lsx_vbitrevi_h(_1, 1); } + // CHECK-LABEL: @vbitrevi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vbitrevi_w(v4u32 _1) { return __lsx_vbitrevi_w(_1, 1); } + // CHECK-LABEL: @vbitrevi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vbitrevi_d(v2u64 _1) { return __lsx_vbitrevi_d(_1, 1); } + // CHECK-LABEL: @vadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __lsx_vadd_b(_1, _2); } + // CHECK-LABEL: @vadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __lsx_vadd_h(_1, _2); } + // CHECK-LABEL: @vadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __lsx_vadd_w(_1, _2); } + // CHECK-LABEL: @vadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __lsx_vadd_d(_1, _2); } + // CHECK-LABEL: @vaddi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vaddi_bu(v16i8 _1) { return __lsx_vaddi_bu(_1, 1); } + // CHECK-LABEL: @vaddi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vaddi_hu(v8i16 _1) { return __lsx_vaddi_hu(_1, 1); } + // CHECK-LABEL: @vaddi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vaddi_wu(v4i32 _1) { return __lsx_vaddi_wu(_1, 1); } + // CHECK-LABEL: @vaddi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vaddi_du(v2i64 _1) { return __lsx_vaddi_du(_1, 1); } + // CHECK-LABEL: @vsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __lsx_vsub_b(_1, _2); } + // CHECK-LABEL: @vsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __lsx_vsub_h(_1, _2); } + // CHECK-LABEL: @vsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __lsx_vsub_w(_1, _2); } + // CHECK-LABEL: @vsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __lsx_vsub_d(_1, _2); } + // CHECK-LABEL: @vsubi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vsubi_bu(v16i8 _1) { return __lsx_vsubi_bu(_1, 1); } + // CHECK-LABEL: @vsubi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vsubi_hu(v8i16 _1) { return __lsx_vsubi_hu(_1, 1); } + // CHECK-LABEL: @vsubi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vsubi_wu(v4i32 _1) { return __lsx_vsubi_wu(_1, 1); } + // CHECK-LABEL: @vsubi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vsubi_du(v2i64 _1) { return __lsx_vsubi_du(_1, 1); } + // CHECK-LABEL: @vmax_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __lsx_vmax_b(_1, _2); } + // CHECK-LABEL: @vmax_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __lsx_vmax_h(_1, _2); } + // CHECK-LABEL: @vmax_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __lsx_vmax_w(_1, _2); } + // CHECK-LABEL: @vmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __lsx_vmax_d(_1, _2); } + // CHECK-LABEL: @vmaxi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vmaxi_b(v16i8 _1) { return __lsx_vmaxi_b(_1, 1); } + // CHECK-LABEL: @vmaxi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vmaxi_h(v8i16 _1) { return __lsx_vmaxi_h(_1, 1); } + // CHECK-LABEL: @vmaxi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vmaxi_w(v4i32 _1) { return __lsx_vmaxi_w(_1, 1); } + // CHECK-LABEL: @vmaxi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vmaxi_d(v2i64 _1) { return __lsx_vmaxi_d(_1, 1); } + // CHECK-LABEL: @vmax_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vmax_bu(v16u8 _1, v16u8 _2) { return __lsx_vmax_bu(_1, _2); } + // CHECK-LABEL: @vmax_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vmax_hu(v8u16 _1, v8u16 _2) { return __lsx_vmax_hu(_1, _2); } + // CHECK-LABEL: @vmax_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vmax_wu(v4u32 _1, v4u32 _2) { return __lsx_vmax_wu(_1, _2); } + // CHECK-LABEL: @vmax_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vmax_du(v2u64 _1, v2u64 _2) { return __lsx_vmax_du(_1, _2); } + // CHECK-LABEL: @vmaxi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vmaxi_bu(v16u8 _1) { return __lsx_vmaxi_bu(_1, 1); } + // CHECK-LABEL: @vmaxi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8u16 vmaxi_hu(v8u16 _1) { return __lsx_vmaxi_hu(_1, 1); } + // CHECK-LABEL: @vmaxi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vmaxi_wu(v4u32 _1) { return __lsx_vmaxi_wu(_1, 1); } + // CHECK-LABEL: @vmaxi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vmaxi_du(v2u64 _1) { return __lsx_vmaxi_du(_1, 1); } + // CHECK-LABEL: @vmin_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __lsx_vmin_b(_1, _2); } + // CHECK-LABEL: @vmin_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __lsx_vmin_h(_1, _2); } + // CHECK-LABEL: @vmin_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __lsx_vmin_w(_1, _2); } + // CHECK-LABEL: @vmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __lsx_vmin_d(_1, _2); } + // CHECK-LABEL: @vmini_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vmini_b(v16i8 _1) { return __lsx_vmini_b(_1, 1); } + // CHECK-LABEL: @vmini_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vmini_h(v8i16 _1) { return __lsx_vmini_h(_1, 1); } + // CHECK-LABEL: @vmini_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vmini_w(v4i32 _1) { return __lsx_vmini_w(_1, 1); } + // CHECK-LABEL: @vmini_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vmini_d(v2i64 _1) { return __lsx_vmini_d(_1, 1); } + // CHECK-LABEL: @vmin_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vmin_bu(v16u8 _1, v16u8 _2) { return __lsx_vmin_bu(_1, _2); } + // CHECK-LABEL: @vmin_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vmin_hu(v8u16 _1, v8u16 _2) { return __lsx_vmin_hu(_1, _2); } + // CHECK-LABEL: @vmin_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vmin_wu(v4u32 _1, v4u32 _2) { return __lsx_vmin_wu(_1, _2); } + // CHECK-LABEL: @vmin_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vmin_du(v2u64 _1, v2u64 _2) { return __lsx_vmin_du(_1, _2); } + // CHECK-LABEL: @vmini_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vmini_bu(v16u8 _1) { return __lsx_vmini_bu(_1, 1); } + // CHECK-LABEL: @vmini_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8u16 vmini_hu(v8u16 _1) { return __lsx_vmini_hu(_1, 1); } + // CHECK-LABEL: @vmini_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vmini_wu(v4u32 _1) { return __lsx_vmini_wu(_1, 1); } + // CHECK-LABEL: @vmini_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vmini_du(v2u64 _1) { return __lsx_vmini_du(_1, 1); } + // CHECK-LABEL: @vseq_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __lsx_vseq_b(_1, _2); } + // CHECK-LABEL: @vseq_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __lsx_vseq_h(_1, _2); } + // CHECK-LABEL: @vseq_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __lsx_vseq_w(_1, _2); } + // CHECK-LABEL: @vseq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __lsx_vseq_d(_1, _2); } + // CHECK-LABEL: @vseqi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vseqi_b(v16i8 _1) { return __lsx_vseqi_b(_1, 1); } + // CHECK-LABEL: @vseqi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vseqi_h(v8i16 _1) { return __lsx_vseqi_h(_1, 1); } + // CHECK-LABEL: @vseqi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vseqi_w(v4i32 _1) { return __lsx_vseqi_w(_1, 1); } + // CHECK-LABEL: @vseqi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vseqi_d(v2i64 _1) { return __lsx_vseqi_d(_1, 1); } + // CHECK-LABEL: @vslti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vslti_b(v16i8 _1) { return __lsx_vslti_b(_1, 1); } + // CHECK-LABEL: @vslt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __lsx_vslt_b(_1, _2); } + // CHECK-LABEL: @vslt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __lsx_vslt_h(_1, _2); } + // CHECK-LABEL: @vslt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __lsx_vslt_w(_1, _2); } + // CHECK-LABEL: @vslt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __lsx_vslt_d(_1, _2); } + // CHECK-LABEL: @vslti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vslti_h(v8i16 _1) { return __lsx_vslti_h(_1, 1); } + // CHECK-LABEL: @vslti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vslti_w(v4i32 _1) { return __lsx_vslti_w(_1, 1); } + // CHECK-LABEL: @vslti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vslti_d(v2i64 _1) { return __lsx_vslti_d(_1, 1); } + // CHECK-LABEL: @vslt_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vslt_bu(v16u8 _1, v16u8 _2) { return __lsx_vslt_bu(_1, _2); } + // CHECK-LABEL: @vslt_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vslt_hu(v8u16 _1, v8u16 _2) { return __lsx_vslt_hu(_1, _2); } + // CHECK-LABEL: @vslt_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vslt_wu(v4u32 _1, v4u32 _2) { return __lsx_vslt_wu(_1, _2); } + // CHECK-LABEL: @vslt_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vslt_du(v2u64 _1, v2u64 _2) { return __lsx_vslt_du(_1, _2); } + // CHECK-LABEL: @vslti_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vslti_bu(v16u8 _1) { return __lsx_vslti_bu(_1, 1); } + // CHECK-LABEL: @vslti_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vslti_hu(v8u16 _1) { return __lsx_vslti_hu(_1, 1); } + // CHECK-LABEL: @vslti_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vslti_wu(v4u32 _1) { return __lsx_vslti_wu(_1, 1); } + // CHECK-LABEL: @vslti_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vslti_du(v2u64 _1) { return __lsx_vslti_du(_1, 1); } + // CHECK-LABEL: @vsle_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __lsx_vsle_b(_1, _2); } + // CHECK-LABEL: @vsle_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __lsx_vsle_h(_1, _2); } + // CHECK-LABEL: @vsle_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __lsx_vsle_w(_1, _2); } + // CHECK-LABEL: @vsle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __lsx_vsle_d(_1, _2); } + // CHECK-LABEL: @vslei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vslei_b(v16i8 _1) { return __lsx_vslei_b(_1, 1); } + // CHECK-LABEL: @vslei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vslei_h(v8i16 _1) { return __lsx_vslei_h(_1, 1); } + // CHECK-LABEL: @vslei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vslei_w(v4i32 _1) { return __lsx_vslei_w(_1, 1); } + // CHECK-LABEL: @vslei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vslei_d(v2i64 _1) { return __lsx_vslei_d(_1, 1); } + // CHECK-LABEL: @vsle_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsle_bu(v16u8 _1, v16u8 _2) { return __lsx_vsle_bu(_1, _2); } + // CHECK-LABEL: @vsle_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsle_hu(v8u16 _1, v8u16 _2) { return __lsx_vsle_hu(_1, _2); } + // CHECK-LABEL: @vsle_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsle_wu(v4u32 _1, v4u32 _2) { return __lsx_vsle_wu(_1, _2); } + // CHECK-LABEL: @vsle_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsle_du(v2u64 _1, v2u64 _2) { return __lsx_vsle_du(_1, _2); } + // CHECK-LABEL: @vslei_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vslei_bu(v16u8 _1) { return __lsx_vslei_bu(_1, 1); } + // CHECK-LABEL: @vslei_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vslei_hu(v8u16 _1) { return __lsx_vslei_hu(_1, 1); } + // CHECK-LABEL: @vslei_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vslei_wu(v4u32 _1) { return __lsx_vslei_wu(_1, 1); } + // CHECK-LABEL: @vslei_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vslei_du(v2u64 _1) { return __lsx_vslei_du(_1, 1); } + // CHECK-LABEL: @vsat_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vsat_b(v16i8 _1) { return __lsx_vsat_b(_1, 1); } + // CHECK-LABEL: @vsat_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vsat_h(v8i16 _1) { return __lsx_vsat_h(_1, 1); } + // CHECK-LABEL: @vsat_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vsat_w(v4i32 _1) { return __lsx_vsat_w(_1, 1); } + // CHECK-LABEL: @vsat_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vsat_d(v2i64 _1) { return __lsx_vsat_d(_1, 1); } + // CHECK-LABEL: @vsat_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vsat_bu(v16u8 _1) { return __lsx_vsat_bu(_1, 1); } + // CHECK-LABEL: @vsat_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8u16 vsat_hu(v8u16 _1) { return __lsx_vsat_hu(_1, 1); } + // CHECK-LABEL: @vsat_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vsat_wu(v4u32 _1) { return __lsx_vsat_wu(_1, 1); } + // CHECK-LABEL: @vsat_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vsat_du(v2u64 _1) { return __lsx_vsat_du(_1, 1); } + // CHECK-LABEL: @vadda_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vadda_b(v16i8 _1, v16i8 _2) { return __lsx_vadda_b(_1, _2); } + // CHECK-LABEL: @vadda_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vadda_h(v8i16 _1, v8i16 _2) { return __lsx_vadda_h(_1, _2); } + // CHECK-LABEL: @vadda_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vadda_w(v4i32 _1, v4i32 _2) { return __lsx_vadda_w(_1, _2); } + // CHECK-LABEL: @vadda_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vadda_d(v2i64 _1, v2i64 _2) { return __lsx_vadda_d(_1, _2); } + // CHECK-LABEL: @vsadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsadd_b(v16i8 _1, v16i8 _2) { return __lsx_vsadd_b(_1, _2); } + // CHECK-LABEL: @vsadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsadd_h(v8i16 _1, v8i16 _2) { return __lsx_vsadd_h(_1, _2); } + // CHECK-LABEL: @vsadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsadd_w(v4i32 _1, v4i32 _2) { return __lsx_vsadd_w(_1, _2); } + // CHECK-LABEL: @vsadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsadd_d(v2i64 _1, v2i64 _2) { return __lsx_vsadd_d(_1, _2); } + // CHECK-LABEL: @vsadd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { return __lsx_vsadd_bu(_1, _2); } + // CHECK-LABEL: @vsadd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { return __lsx_vsadd_hu(_1, _2); } + // CHECK-LABEL: @vsadd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { return __lsx_vsadd_wu(_1, _2); } + // CHECK-LABEL: @vsadd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vsadd_du(v2u64 _1, v2u64 _2) { return __lsx_vsadd_du(_1, _2); } + // CHECK-LABEL: @vavg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __lsx_vavg_b(_1, _2); } + // CHECK-LABEL: @vavg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __lsx_vavg_h(_1, _2); } + // CHECK-LABEL: @vavg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __lsx_vavg_w(_1, _2); } + // CHECK-LABEL: @vavg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __lsx_vavg_d(_1, _2); } + // CHECK-LABEL: @vavg_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vavg_bu(v16u8 _1, v16u8 _2) { return __lsx_vavg_bu(_1, _2); } + // CHECK-LABEL: @vavg_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vavg_hu(v8u16 _1, v8u16 _2) { return __lsx_vavg_hu(_1, _2); } + // CHECK-LABEL: @vavg_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vavg_wu(v4u32 _1, v4u32 _2) { return __lsx_vavg_wu(_1, _2); } + // CHECK-LABEL: @vavg_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vavg_du(v2u64 _1, v2u64 _2) { return __lsx_vavg_du(_1, _2); } + // CHECK-LABEL: @vavgr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vavgr_b(v16i8 _1, v16i8 _2) { return __lsx_vavgr_b(_1, _2); } + // CHECK-LABEL: @vavgr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vavgr_h(v8i16 _1, v8i16 _2) { return __lsx_vavgr_h(_1, _2); } + // CHECK-LABEL: @vavgr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vavgr_w(v4i32 _1, v4i32 _2) { return __lsx_vavgr_w(_1, _2); } + // CHECK-LABEL: @vavgr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vavgr_d(v2i64 _1, v2i64 _2) { return __lsx_vavgr_d(_1, _2); } + // CHECK-LABEL: @vavgr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { return __lsx_vavgr_bu(_1, _2); } + // CHECK-LABEL: @vavgr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { return __lsx_vavgr_hu(_1, _2); } + // CHECK-LABEL: @vavgr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { return __lsx_vavgr_wu(_1, _2); } + // CHECK-LABEL: @vavgr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vavgr_du(v2u64 _1, v2u64 _2) { return __lsx_vavgr_du(_1, _2); } + // CHECK-LABEL: @vssub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssub_b(v16i8 _1, v16i8 _2) { return __lsx_vssub_b(_1, _2); } + // CHECK-LABEL: @vssub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssub_h(v8i16 _1, v8i16 _2) { return __lsx_vssub_h(_1, _2); } + // CHECK-LABEL: @vssub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssub_w(v4i32 _1, v4i32 _2) { return __lsx_vssub_w(_1, _2); } + // CHECK-LABEL: @vssub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vssub_d(v2i64 _1, v2i64 _2) { return __lsx_vssub_d(_1, _2); } + // CHECK-LABEL: @vssub_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssub_bu(v16u8 _1, v16u8 _2) { return __lsx_vssub_bu(_1, _2); } + // CHECK-LABEL: @vssub_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssub_hu(v8u16 _1, v8u16 _2) { return __lsx_vssub_hu(_1, _2); } + // CHECK-LABEL: @vssub_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssub_wu(v4u32 _1, v4u32 _2) { return __lsx_vssub_wu(_1, _2); } + // CHECK-LABEL: @vssub_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vssub_du(v2u64 _1, v2u64 _2) { return __lsx_vssub_du(_1, _2); } + // CHECK-LABEL: @vabsd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vabsd_b(v16i8 _1, v16i8 _2) { return __lsx_vabsd_b(_1, _2); } + // CHECK-LABEL: @vabsd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vabsd_h(v8i16 _1, v8i16 _2) { return __lsx_vabsd_h(_1, _2); } + // CHECK-LABEL: @vabsd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vabsd_w(v4i32 _1, v4i32 _2) { return __lsx_vabsd_w(_1, _2); } + // CHECK-LABEL: @vabsd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vabsd_d(v2i64 _1, v2i64 _2) { return __lsx_vabsd_d(_1, _2); } + // CHECK-LABEL: @vabsd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { return __lsx_vabsd_bu(_1, _2); } + // CHECK-LABEL: @vabsd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { return __lsx_vabsd_hu(_1, _2); } + // CHECK-LABEL: @vabsd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { return __lsx_vabsd_wu(_1, _2); } + // CHECK-LABEL: @vabsd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vabsd_du(v2u64 _1, v2u64 _2) { return __lsx_vabsd_du(_1, _2); } + // CHECK-LABEL: @vmul_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __lsx_vmul_b(_1, _2); } + // CHECK-LABEL: @vmul_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __lsx_vmul_h(_1, _2); } + // CHECK-LABEL: @vmul_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __lsx_vmul_w(_1, _2); } + // CHECK-LABEL: @vmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __lsx_vmul_d(_1, _2); } + // CHECK-LABEL: @vmadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <16 x i8> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmadd_b(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmadd_h(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmadd_w(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <16 x i8> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmsub_b(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmsub_h(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmsub_w(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vdiv_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __lsx_vdiv_b(_1, _2); } + // CHECK-LABEL: @vdiv_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __lsx_vdiv_h(_1, _2); } + // CHECK-LABEL: @vdiv_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __lsx_vdiv_w(_1, _2); } + // CHECK-LABEL: @vdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __lsx_vdiv_d(_1, _2); } + // CHECK-LABEL: @vdiv_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { return __lsx_vdiv_bu(_1, _2); } + // CHECK-LABEL: @vdiv_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { return __lsx_vdiv_hu(_1, _2); } + // CHECK-LABEL: @vdiv_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { return __lsx_vdiv_wu(_1, _2); } + // CHECK-LABEL: @vdiv_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vdiv_du(v2u64 _1, v2u64 _2) { return __lsx_vdiv_du(_1, _2); } + // CHECK-LABEL: @vhaddw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhaddw_h_b(_1, _2); } + // CHECK-LABEL: @vhaddw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhaddw_w_h(_1, _2); } + // CHECK-LABEL: @vhaddw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhaddw_d_w(_1, _2); } + // CHECK-LABEL: @vhaddw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhaddw_hu_bu(_1, _2); } + // CHECK-LABEL: @vhaddw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhaddw_wu_hu(_1, _2); } + // CHECK-LABEL: @vhaddw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhaddw_du_wu(_1, _2); } + // CHECK-LABEL: @vhsubw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { return __lsx_vhsubw_h_b(_1, _2); } + // CHECK-LABEL: @vhsubw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { return __lsx_vhsubw_w_h(_1, _2); } + // CHECK-LABEL: @vhsubw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { return __lsx_vhsubw_d_w(_1, _2); } + // CHECK-LABEL: @vhsubw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { return __lsx_vhsubw_hu_bu(_1, _2); } + // CHECK-LABEL: @vhsubw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { return __lsx_vhsubw_wu_hu(_1, _2); } + // CHECK-LABEL: @vhsubw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { return __lsx_vhsubw_du_wu(_1, _2); } + // CHECK-LABEL: @vmod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __lsx_vmod_b(_1, _2); } + // CHECK-LABEL: @vmod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __lsx_vmod_h(_1, _2); } + // CHECK-LABEL: @vmod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __lsx_vmod_w(_1, _2); } + // CHECK-LABEL: @vmod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __lsx_vmod_d(_1, _2); } + // CHECK-LABEL: @vmod_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vmod_bu(v16u8 _1, v16u8 _2) { return __lsx_vmod_bu(_1, _2); } + // CHECK-LABEL: @vmod_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vmod_hu(v8u16 _1, v8u16 _2) { return __lsx_vmod_hu(_1, _2); } + // CHECK-LABEL: @vmod_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vmod_wu(v4u32 _1, v4u32 _2) { return __lsx_vmod_wu(_1, _2); } + // CHECK-LABEL: @vmod_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vmod_du(v2u64 _1, v2u64 _2) { return __lsx_vmod_du(_1, _2); } + // CHECK-LABEL: @vreplve_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> _1:%.*, i32 _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> TMP0, i32 _2:%.*) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vreplve_b(v16i8 _1, int _2) { return __lsx_vreplve_b(_1, _2); } + // CHECK-LABEL: @vreplve_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> _1:%.*, i32 _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> TMP0, i32 _2:%.*) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vreplve_h(v8i16 _1, int _2) { return __lsx_vreplve_h(_1, _2); } + // CHECK-LABEL: @vreplve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> _1:%.*, i32 _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> TMP0, i32 _2:%.*) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vreplve_w(v4i32 _1, int _2) { return __lsx_vreplve_w(_1, _2); } + // CHECK-LABEL: @vreplve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> _1:%.*, i32 _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> TMP0, i32 _2:%.*) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vreplve_d(v2i64 _1, int _2) { return __lsx_vreplve_d(_1, _2); } + // CHECK-LABEL: @vreplvei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vreplvei_b(v16i8 _1) { return __lsx_vreplvei_b(_1, 1); } + // CHECK-LABEL: @vreplvei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vreplvei_h(v8i16 _1) { return __lsx_vreplvei_h(_1, 1); } + // CHECK-LABEL: @vreplvei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vreplvei_w(v4i32 _1) { return __lsx_vreplvei_w(_1, 1); } + // CHECK-LABEL: @vreplvei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vreplvei_d(v2i64 _1) { return __lsx_vreplvei_d(_1, 1); } + // CHECK-LABEL: @vpickev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vpickev_b(v16i8 _1, v16i8 _2) { return __lsx_vpickev_b(_1, _2); } + // CHECK-LABEL: @vpickev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vpickev_h(v8i16 _1, v8i16 _2) { return __lsx_vpickev_h(_1, _2); } + // CHECK-LABEL: @vpickev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vpickev_w(v4i32 _1, v4i32 _2) { return __lsx_vpickev_w(_1, _2); } + // CHECK-LABEL: @vpickev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vpickev_d(v2i64 _1, v2i64 _2) { return __lsx_vpickev_d(_1, _2); } + // CHECK-LABEL: @vpickod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vpickod_b(v16i8 _1, v16i8 _2) { return __lsx_vpickod_b(_1, _2); } + // CHECK-LABEL: @vpickod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vpickod_h(v8i16 _1, v8i16 _2) { return __lsx_vpickod_h(_1, _2); } + // CHECK-LABEL: @vpickod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vpickod_w(v4i32 _1, v4i32 _2) { return __lsx_vpickod_w(_1, _2); } + // CHECK-LABEL: @vpickod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vpickod_d(v2i64 _1, v2i64 _2) { return __lsx_vpickod_d(_1, _2); } + // CHECK-LABEL: @vilvh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vilvh_b(v16i8 _1, v16i8 _2) { return __lsx_vilvh_b(_1, _2); } + // CHECK-LABEL: @vilvh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vilvh_h(v8i16 _1, v8i16 _2) { return __lsx_vilvh_h(_1, _2); } + // CHECK-LABEL: @vilvh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vilvh_w(v4i32 _1, v4i32 _2) { return __lsx_vilvh_w(_1, _2); } + // CHECK-LABEL: @vilvh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vilvh_d(v2i64 _1, v2i64 _2) { return __lsx_vilvh_d(_1, _2); } + // CHECK-LABEL: @vilvl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vilvl_b(v16i8 _1, v16i8 _2) { return __lsx_vilvl_b(_1, _2); } + // CHECK-LABEL: @vilvl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vilvl_h(v8i16 _1, v8i16 _2) { return __lsx_vilvl_h(_1, _2); } + // CHECK-LABEL: @vilvl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vilvl_w(v4i32 _1, v4i32 _2) { return __lsx_vilvl_w(_1, _2); } + // CHECK-LABEL: @vilvl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vilvl_d(v2i64 _1, v2i64 _2) { return __lsx_vilvl_d(_1, _2); } + // CHECK-LABEL: @vpackev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vpackev_b(v16i8 _1, v16i8 _2) { return __lsx_vpackev_b(_1, _2); } + // CHECK-LABEL: @vpackev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vpackev_h(v8i16 _1, v8i16 _2) { return __lsx_vpackev_h(_1, _2); } + // CHECK-LABEL: @vpackev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vpackev_w(v4i32 _1, v4i32 _2) { return __lsx_vpackev_w(_1, _2); } + // CHECK-LABEL: @vpackev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vpackev_d(v2i64 _1, v2i64 _2) { return __lsx_vpackev_d(_1, _2); } + // CHECK-LABEL: @vpackod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vpackod_b(v16i8 _1, v16i8 _2) { return __lsx_vpackod_b(_1, _2); } + // CHECK-LABEL: @vpackod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vpackod_h(v8i16 _1, v8i16 _2) { return __lsx_vpackod_h(_1, _2); } + // CHECK-LABEL: @vpackod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vpackod_w(v4i32 _1, v4i32 _2) { return __lsx_vpackod_w(_1, _2); } + // CHECK-LABEL: @vpackod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vpackod_d(v2i64 _1, v2i64 _2) { return __lsx_vpackod_d(_1, _2); } + // CHECK-LABEL: @vshuf_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vshuf_h(_1, _2, _3); + } + // CHECK-LABEL: @vshuf_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __lsx_vshuf_w(_1, _2, _3); + } + // CHECK-LABEL: @vshuf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vshuf_d(_1, _2, _3); + } + // CHECK-LABEL: @vand_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vand_v(v16u8 _1, v16u8 _2) { return __lsx_vand_v(_1, _2); } + // CHECK-LABEL: @vandi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vandi_b(v16u8 _1) { return __lsx_vandi_b(_1, 1); } + // CHECK-LABEL: @vor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vor_v(v16u8 _1, v16u8 _2) { return __lsx_vor_v(_1, _2); } + // CHECK-LABEL: @vori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vori_b(v16u8 _1) { return __lsx_vori_b(_1, 1); } + // CHECK-LABEL: @vnor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __lsx_vnor_v(_1, _2); } + // CHECK-LABEL: @vnori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vnori_b(v16u8 _1) { return __lsx_vnori_b(_1, 1); } + // CHECK-LABEL: @vxor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __lsx_vxor_v(_1, _2); } + // CHECK-LABEL: @vxori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vxori_b(v16u8 _1) { return __lsx_vxori_b(_1, 1); } + // CHECK-LABEL: @vbitsel_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <16 x i8> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { + return __lsx_vbitsel_v(_1, _2, _3); + } + // CHECK-LABEL: @vbitseli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { return __lsx_vbitseli_b(_1, _2, 1); } + // CHECK-LABEL: @vshuf4i_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vshuf4i_b(v16i8 _1) { return __lsx_vshuf4i_b(_1, 1); } + // CHECK-LABEL: @vshuf4i_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vshuf4i_h(v8i16 _1) { return __lsx_vshuf4i_h(_1, 1); } + // CHECK-LABEL: @vshuf4i_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vshuf4i_w(v4i32 _1) { return __lsx_vshuf4i_w(_1, 1); } + // CHECK-LABEL: @vreplgr2vr_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 _1:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <16 x i8> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v16i8 vreplgr2vr_b(int _1) { return __lsx_vreplgr2vr_b(_1); } + // CHECK-LABEL: @vreplgr2vr_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 _1:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <8 x i16> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v8i16 vreplgr2vr_h(int _1) { return __lsx_vreplgr2vr_h(_1); } + // CHECK-LABEL: @vreplgr2vr_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x i32> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v4i32 vreplgr2vr_w(int _1) { return __lsx_vreplgr2vr_w(_1); } + // CHECK-LABEL: @vreplgr2vr_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <2 x i64> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v2i64 vreplgr2vr_d(long _1) { return __lsx_vreplgr2vr_d(_1); } + // CHECK-LABEL: @vpcnt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vpcnt_b(v16i8 _1) { return __lsx_vpcnt_b(_1); } + // CHECK-LABEL: @vpcnt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vpcnt_h(v8i16 _1) { return __lsx_vpcnt_h(_1); } + // CHECK-LABEL: @vpcnt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vpcnt_w(v4i32 _1) { return __lsx_vpcnt_w(_1); } + // CHECK-LABEL: @vpcnt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vpcnt_d(v2i64 _1) { return __lsx_vpcnt_d(_1); } + // CHECK-LABEL: @vclo_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vclo_b(v16i8 _1) { return __lsx_vclo_b(_1); } + // CHECK-LABEL: @vclo_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vclo_h(v8i16 _1) { return __lsx_vclo_h(_1); } + // CHECK-LABEL: @vclo_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vclo_w(v4i32 _1) { return __lsx_vclo_w(_1); } + // CHECK-LABEL: @vclo_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vclo_d(v2i64 _1) { return __lsx_vclo_d(_1); } + // CHECK-LABEL: @vclz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vclz_b(v16i8 _1) { return __lsx_vclz_b(_1); } + // CHECK-LABEL: @vclz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vclz_h(v8i16 _1) { return __lsx_vclz_h(_1); } + // CHECK-LABEL: @vclz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vclz_w(v4i32 _1) { return __lsx_vclz_w(_1); } + // CHECK-LABEL: @vclz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vclz_d(v2i64 _1) { return __lsx_vclz_d(_1); } + // CHECK-LABEL: @vpickve2gr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: ret i32 TMP1 + // + int vpickve2gr_b(v16i8 _1) { return __lsx_vpickve2gr_b(_1, 1); } + // CHECK-LABEL: @vpickve2gr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: ret i32 TMP1 + // + int vpickve2gr_h(v8i16 _1) { return __lsx_vpickve2gr_h(_1, 1); } + // CHECK-LABEL: @vpickve2gr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: ret i32 TMP1 + // + int vpickve2gr_w(v4i32 _1) { return __lsx_vpickve2gr_w(_1, 1); } + // CHECK-LABEL: @vpickve2gr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret i64 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: ret i64 TMP1 + // + long vpickve2gr_d(v2i64 _1) { return __lsx_vpickve2gr_d(_1, 1); } + // CHECK-LABEL: @vpickve2gr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: ret i32 TMP1 + // + unsigned int vpickve2gr_bu(v16i8 _1) { return __lsx_vpickve2gr_bu(_1, 1); } + // CHECK-LABEL: @vpickve2gr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: ret i32 TMP1 + // + unsigned int vpickve2gr_hu(v8i16 _1) { return __lsx_vpickve2gr_hu(_1, 1); } + // CHECK-LABEL: @vpickve2gr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: ret i32 TMP1 + // + unsigned int vpickve2gr_wu(v4i32 _1) { return __lsx_vpickve2gr_wu(_1, 1); } + // CHECK-LABEL: @vpickve2gr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret i64 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: ret i64 TMP1 + // + unsigned long int vpickve2gr_du(v2i64 _1) { return __lsx_vpickve2gr_du(_1, 1); } + // CHECK-LABEL: @vinsgr2vr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> _1:%.*, i32 1, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> TMP0, i32 1, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vinsgr2vr_b(v16i8 _1) { return __lsx_vinsgr2vr_b(_1, 1, 1); } + // CHECK-LABEL: @vinsgr2vr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> _1:%.*, i32 1, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> TMP0, i32 1, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vinsgr2vr_h(v8i16 _1) { return __lsx_vinsgr2vr_h(_1, 1, 1); } + // CHECK-LABEL: @vinsgr2vr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> _1:%.*, i32 1, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> TMP0, i32 1, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vinsgr2vr_w(v4i32 _1) { return __lsx_vinsgr2vr_w(_1, 1, 1); } + // CHECK-LABEL: @vinsgr2vr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> _1:%.*, i64 1, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> TMP0, i64 1, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vinsgr2vr_d(v2i64 _1) { return __lsx_vinsgr2vr_d(_1, 1, 1); } + // CHECK-LABEL: @vfadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfadd_s(v4f32 _1, v4f32 _2) { return __lsx_vfadd_s(_1, _2); } + // CHECK-LABEL: @vfadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x double> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2f64 vfadd_d(v2f64 _1, v2f64 _2) { return __lsx_vfadd_d(_1, _2); } + // CHECK-LABEL: @vfsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfsub_s(v4f32 _1, v4f32 _2) { return __lsx_vfsub_s(_1, _2); } + // CHECK-LABEL: @vfsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x double> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2f64 vfsub_d(v2f64 _1, v2f64 _2) { return __lsx_vfsub_d(_1, _2); } + // CHECK-LABEL: @vfmul_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfmul_s(v4f32 _1, v4f32 _2) { return __lsx_vfmul_s(_1, _2); } + // CHECK-LABEL: @vfmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x double> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2f64 vfmul_d(v2f64 _1, v2f64 _2) { return __lsx_vfmul_d(_1, _2); } + // CHECK-LABEL: @vfdiv_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { return __lsx_vfdiv_s(_1, _2); } + // CHECK-LABEL: @vfdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x double> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { return __lsx_vfdiv_d(_1, _2); } + // CHECK-LABEL: @vfcvt_h_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { return __lsx_vfcvt_h_s(_1, _2); } + // CHECK-LABEL: @vfcvt_s_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { return __lsx_vfcvt_s_d(_1, _2); } + // CHECK-LABEL: @vfmin_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfmin_s(v4f32 _1, v4f32 _2) { return __lsx_vfmin_s(_1, _2); } + // CHECK-LABEL: @vfmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x double> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2f64 vfmin_d(v2f64 _1, v2f64 _2) { return __lsx_vfmin_d(_1, _2); } + // CHECK-LABEL: @vfmina_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfmina_s(v4f32 _1, v4f32 _2) { return __lsx_vfmina_s(_1, _2); } + // CHECK-LABEL: @vfmina_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x double> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2f64 vfmina_d(v2f64 _1, v2f64 _2) { return __lsx_vfmina_d(_1, _2); } + // CHECK-LABEL: @vfmax_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfmax_s(v4f32 _1, v4f32 _2) { return __lsx_vfmax_s(_1, _2); } + // CHECK-LABEL: @vfmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x double> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2f64 vfmax_d(v2f64 _1, v2f64 _2) { return __lsx_vfmax_d(_1, _2); } + // CHECK-LABEL: @vfmaxa_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { return __lsx_vfmaxa_s(_1, _2); } + // CHECK-LABEL: @vfmaxa_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x double> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { return __lsx_vfmaxa_d(_1, _2); } + // CHECK-LABEL: @vfclass_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vfclass_s(v4f32 _1) { return __lsx_vfclass_s(_1); } + // CHECK-LABEL: @vfclass_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vfclass_d(v2f64 _1) { return __lsx_vfclass_d(_1); } + // CHECK-LABEL: @vfsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vfsqrt_s(v4f32 _1) { return __lsx_vfsqrt_s(_1); } + // CHECK-LABEL: @vfsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vfsqrt_d(v2f64 _1) { return __lsx_vfsqrt_d(_1); } + // CHECK-LABEL: @vfrecip_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vfrecip_s(v4f32 _1) { return __lsx_vfrecip_s(_1); } + // CHECK-LABEL: @vfrecip_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vfrecip_d(v2f64 _1) { return __lsx_vfrecip_d(_1); } + // CHECK-LABEL: @vfrint_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vfrint_s(v4f32 _1) { return __lsx_vfrint_s(_1); } + // CHECK-LABEL: @vfrint_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vfrint_d(v2f64 _1) { return __lsx_vfrint_d(_1); } + // CHECK-LABEL: @vfrsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vfrsqrt_s(v4f32 _1) { return __lsx_vfrsqrt_s(_1); } + // CHECK-LABEL: @vfrsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vfrsqrt_d(v2f64 _1) { return __lsx_vfrsqrt_d(_1); } + // CHECK-LABEL: @vflogb_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vflogb_s(v4f32 _1) { return __lsx_vflogb_s(_1); } + // CHECK-LABEL: @vflogb_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vflogb_d(v2f64 _1) { return __lsx_vflogb_d(_1); } + // CHECK-LABEL: @vfcvth_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vfcvth_s_h(v8i16 _1) { return __lsx_vfcvth_s_h(_1); } + // CHECK-LABEL: @vfcvth_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vfcvth_d_s(v4f32 _1) { return __lsx_vfcvth_d_s(_1); } + // CHECK-LABEL: @vfcvtl_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vfcvtl_s_h(v8i16 _1) { return __lsx_vfcvtl_s_h(_1); } + // CHECK-LABEL: @vfcvtl_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vfcvtl_d_s(v4f32 _1) { return __lsx_vfcvtl_d_s(_1); } + // CHECK-LABEL: @vftint_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vftint_w_s(v4f32 _1) { return __lsx_vftint_w_s(_1); } + // CHECK-LABEL: @vftint_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftint_l_d(v2f64 _1) { return __lsx_vftint_l_d(_1); } + // CHECK-LABEL: @vftint_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vftint_wu_s(v4f32 _1) { return __lsx_vftint_wu_s(_1); } + // CHECK-LABEL: @vftint_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vftint_lu_d(v2f64 _1) { return __lsx_vftint_lu_d(_1); } + // CHECK-LABEL: @vftintrz_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vftintrz_w_s(v4f32 _1) { return __lsx_vftintrz_w_s(_1); } + // CHECK-LABEL: @vftintrz_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrz_l_d(v2f64 _1) { return __lsx_vftintrz_l_d(_1); } + // CHECK-LABEL: @vftintrz_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vftintrz_wu_s(v4f32 _1) { return __lsx_vftintrz_wu_s(_1); } + // CHECK-LABEL: @vftintrz_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vftintrz_lu_d(v2f64 _1) { return __lsx_vftintrz_lu_d(_1); } + // CHECK-LABEL: @vffint_s_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vffint_s_w(v4i32 _1) { return __lsx_vffint_s_w(_1); } + // CHECK-LABEL: @vffint_d_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vffint_d_l(v2i64 _1) { return __lsx_vffint_d_l(_1); } + // CHECK-LABEL: @vffint_s_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vffint_s_wu(v4u32 _1) { return __lsx_vffint_s_wu(_1); } + // CHECK-LABEL: @vffint_d_lu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vffint_d_lu(v2u64 _1) { return __lsx_vffint_d_lu(_1); } + // CHECK-LABEL: @vandn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vandn_v(v16u8 _1, v16u8 _2) { return __lsx_vandn_v(_1, _2); } + // CHECK-LABEL: @vneg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vneg_b(v16i8 _1) { return __lsx_vneg_b(_1); } + // CHECK-LABEL: @vneg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vneg_h(v8i16 _1) { return __lsx_vneg_h(_1); } + // CHECK-LABEL: @vneg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vneg_w(v4i32 _1) { return __lsx_vneg_w(_1); } + // CHECK-LABEL: @vneg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vneg_d(v2i64 _1) { return __lsx_vneg_d(_1); } + // CHECK-LABEL: @vmuh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __lsx_vmuh_b(_1, _2); } + // CHECK-LABEL: @vmuh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __lsx_vmuh_h(_1, _2); } + // CHECK-LABEL: @vmuh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __lsx_vmuh_w(_1, _2); } + // CHECK-LABEL: @vmuh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __lsx_vmuh_d(_1, _2); } + // CHECK-LABEL: @vmuh_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { return __lsx_vmuh_bu(_1, _2); } + // CHECK-LABEL: @vmuh_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { return __lsx_vmuh_hu(_1, _2); } + // CHECK-LABEL: @vmuh_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { return __lsx_vmuh_wu(_1, _2); } + // CHECK-LABEL: @vmuh_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vmuh_du(v2u64 _1, v2u64 _2) { return __lsx_vmuh_du(_1, _2); } + // CHECK-LABEL: @vsllwil_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vsllwil_h_b(v16i8 _1) { return __lsx_vsllwil_h_b(_1, 1); } + // CHECK-LABEL: @vsllwil_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vsllwil_w_h(v8i16 _1) { return __lsx_vsllwil_w_h(_1, 1); } + // CHECK-LABEL: @vsllwil_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vsllwil_d_w(v4i32 _1) { return __lsx_vsllwil_d_w(_1, 1); } + // CHECK-LABEL: @vsllwil_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8u16 vsllwil_hu_bu(v16u8 _1) { return __lsx_vsllwil_hu_bu(_1, 1); } + // CHECK-LABEL: @vsllwil_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vsllwil_wu_hu(v8u16 _1) { return __lsx_vsllwil_wu_hu(_1, 1); } + // CHECK-LABEL: @vsllwil_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vsllwil_du_wu(v4u32 _1) { return __lsx_vsllwil_du_wu(_1, 1); } + // CHECK-LABEL: @vsran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsran_b_h(_1, _2); } + // CHECK-LABEL: @vsran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsran_h_w(_1, _2); } + // CHECK-LABEL: @vsran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsran_w_d(_1, _2); } + // CHECK-LABEL: @vssran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssran_b_h(_1, _2); } + // CHECK-LABEL: @vssran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssran_h_w(_1, _2); } + // CHECK-LABEL: @vssran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssran_w_d(_1, _2); } + // CHECK-LABEL: @vssran_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssran_bu_h(_1, _2); } + // CHECK-LABEL: @vssran_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssran_hu_w(_1, _2); } + // CHECK-LABEL: @vssran_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssran_wu_d(_1, _2); } + // CHECK-LABEL: @vsrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrarn_b_h(_1, _2); } + // CHECK-LABEL: @vsrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrarn_h_w(_1, _2); } + // CHECK-LABEL: @vsrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrarn_w_d(_1, _2); } + // CHECK-LABEL: @vssrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrarn_b_h(_1, _2); } + // CHECK-LABEL: @vssrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrarn_h_w(_1, _2); } + // CHECK-LABEL: @vssrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrarn_w_d(_1, _2); } + // CHECK-LABEL: @vssrarn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrarn_bu_h(_1, _2); } + // CHECK-LABEL: @vssrarn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrarn_hu_w(_1, _2); } + // CHECK-LABEL: @vssrarn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrarn_wu_d(_1, _2); } + // CHECK-LABEL: @vsrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrln_b_h(_1, _2); } + // CHECK-LABEL: @vsrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrln_h_w(_1, _2); } + // CHECK-LABEL: @vsrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrln_w_d(_1, _2); } + // CHECK-LABEL: @vssrln_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrln_bu_h(_1, _2); } + // CHECK-LABEL: @vssrln_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrln_hu_w(_1, _2); } + // CHECK-LABEL: @vssrln_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrln_wu_d(_1, _2); } + // CHECK-LABEL: @vsrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vsrlrn_b_h(_1, _2); } + // CHECK-LABEL: @vsrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vsrlrn_h_w(_1, _2); } + // CHECK-LABEL: @vsrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vsrlrn_w_d(_1, _2); } + // CHECK-LABEL: @vssrlrn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { return __lsx_vssrlrn_bu_h(_1, _2); } + // CHECK-LABEL: @vssrlrn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { return __lsx_vssrlrn_hu_w(_1, _2); } + // CHECK-LABEL: @vssrlrn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { return __lsx_vssrlrn_wu_d(_1, _2); } + // CHECK-LABEL: @vfrstpi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { return __lsx_vfrstpi_b(_1, _2, 1); } + // CHECK-LABEL: @vfrstpi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { return __lsx_vfrstpi_h(_1, _2, 1); } + // CHECK-LABEL: @vfrstp_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <16 x i8> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vfrstp_b(_1, _2, _3); + } + // CHECK-LABEL: @vfrstp_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __lsx_vfrstp_h(_1, _2, _3); + } + // CHECK-LABEL: @vshuf4i_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { return __lsx_vshuf4i_d(_1, _2, 1); } + // CHECK-LABEL: @vbsrl_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vbsrl_v(v16i8 _1) { return __lsx_vbsrl_v(_1, 1); } + // CHECK-LABEL: @vbsll_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vbsll_v(v16i8 _1) { return __lsx_vbsll_v(_1, 1); } + // CHECK-LABEL: @vextrins_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vextrins_b(v16i8 _1, v16i8 _2) { return __lsx_vextrins_b(_1, _2, 1); } + // CHECK-LABEL: @vextrins_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vextrins_h(v8i16 _1, v8i16 _2) { return __lsx_vextrins_h(_1, _2, 1); } + // CHECK-LABEL: @vextrins_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vextrins_w(v4i32 _1, v4i32 _2) { return __lsx_vextrins_w(_1, _2, 1); } + // CHECK-LABEL: @vextrins_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vextrins_d(v2i64 _1, v2i64 _2) { return __lsx_vextrins_d(_1, _2, 1); } + // CHECK-LABEL: @vmskltz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vmskltz_b(v16i8 _1) { return __lsx_vmskltz_b(_1); } + // CHECK-LABEL: @vmskltz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vmskltz_h(v8i16 _1) { return __lsx_vmskltz_h(_1); } + // CHECK-LABEL: @vmskltz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vmskltz_w(v4i32 _1) { return __lsx_vmskltz_w(_1); } + // CHECK-LABEL: @vmskltz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vmskltz_d(v2i64 _1) { return __lsx_vmskltz_d(_1); } + // CHECK-LABEL: @vsigncov_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { return __lsx_vsigncov_b(_1, _2); } + // CHECK-LABEL: @vsigncov_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { return __lsx_vsigncov_h(_1, _2); } + // CHECK-LABEL: @vsigncov_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { return __lsx_vsigncov_w(_1, _2); } + // CHECK-LABEL: @vsigncov_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { return __lsx_vsigncov_d(_1, _2); } + // CHECK-LABEL: @vfmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> _1:%.*, <4 x float> _2:%.*, <4 x float> _3:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> TMP0, <4 x float> TMP1, <4 x float> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x float> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfmadd_s(_1, _2, _3); + } + // CHECK-LABEL: @vfmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> _1:%.*, <2 x double> _2:%.*, <2 x double> _3:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> TMP0, <2 x double> TMP1, <2 x double> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x double> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vfmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> _1:%.*, <4 x float> _2:%.*, <4 x float> _3:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> TMP0, <4 x float> TMP1, <4 x float> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x float> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfmsub_s(_1, _2, _3); + } + // CHECK-LABEL: @vfmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> _1:%.*, <2 x double> _2:%.*, <2 x double> _3:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> TMP0, <2 x double> TMP1, <2 x double> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x double> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vfnmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> _1:%.*, <4 x float> _2:%.*, <4 x float> _3:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> TMP0, <4 x float> TMP1, <4 x float> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x float> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfnmadd_s(_1, _2, _3); + } + // CHECK-LABEL: @vfnmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> _1:%.*, <2 x double> _2:%.*, <2 x double> _3:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> TMP0, <2 x double> TMP1, <2 x double> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x double> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfnmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vfnmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> _1:%.*, <4 x float> _2:%.*, <4 x float> _3:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> TMP0, <4 x float> TMP1, <4 x float> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x float> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __lsx_vfnmsub_s(_1, _2, _3); + } + // CHECK-LABEL: @vfnmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> _1:%.*, <2 x double> _2:%.*, <2 x double> _3:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> TMP0, <2 x double> TMP1, <2 x double> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x double> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __lsx_vfnmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vftintrne_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vftintrne_w_s(v4f32 _1) { return __lsx_vftintrne_w_s(_1); } + // CHECK-LABEL: @vftintrne_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrne_l_d(v2f64 _1) { return __lsx_vftintrne_l_d(_1); } + // CHECK-LABEL: @vftintrp_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vftintrp_w_s(v4f32 _1) { return __lsx_vftintrp_w_s(_1); } + // CHECK-LABEL: @vftintrp_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrp_l_d(v2f64 _1) { return __lsx_vftintrp_l_d(_1); } + // CHECK-LABEL: @vftintrm_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vftintrm_w_s(v4f32 _1) { return __lsx_vftintrm_w_s(_1); } + // CHECK-LABEL: @vftintrm_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrm_l_d(v2f64 _1) { return __lsx_vftintrm_l_d(_1); } + // CHECK-LABEL: @vftint_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftint_w_d(_1, _2); } + // CHECK-LABEL: @vffint_s_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { return __lsx_vffint_s_l(_1, _2); } + // CHECK-LABEL: @vftintrz_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrz_w_d(_1, _2); } + // CHECK-LABEL: @vftintrp_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrp_w_d(_1, _2); } + // CHECK-LABEL: @vftintrm_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrm_w_d(_1, _2); } + // CHECK-LABEL: @vftintrne_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { return __lsx_vftintrne_w_d(_1, _2); } + // CHECK-LABEL: @vftintl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintl_l_s(v4f32 _1) { return __lsx_vftintl_l_s(_1); } + // CHECK-LABEL: @vftinth_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftinth_l_s(v4f32 _1) { return __lsx_vftinth_l_s(_1); } + // CHECK-LABEL: @vffinth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vffinth_d_w(v4i32 _1) { return __lsx_vffinth_d_w(_1); } + // CHECK-LABEL: @vffintl_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vffintl_d_w(v4i32 _1) { return __lsx_vffintl_d_w(_1); } + // CHECK-LABEL: @vftintrzl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrzl_l_s(v4f32 _1) { return __lsx_vftintrzl_l_s(_1); } + // CHECK-LABEL: @vftintrzh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrzh_l_s(v4f32 _1) { return __lsx_vftintrzh_l_s(_1); } + // CHECK-LABEL: @vftintrpl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrpl_l_s(v4f32 _1) { return __lsx_vftintrpl_l_s(_1); } + // CHECK-LABEL: @vftintrph_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrph_l_s(v4f32 _1) { return __lsx_vftintrph_l_s(_1); } + // CHECK-LABEL: @vftintrml_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrml_l_s(v4f32 _1) { return __lsx_vftintrml_l_s(_1); } + // CHECK-LABEL: @vftintrmh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrmh_l_s(v4f32 _1) { return __lsx_vftintrmh_l_s(_1); } + // CHECK-LABEL: @vftintrnel_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrnel_l_s(v4f32 _1) { return __lsx_vftintrnel_l_s(_1); } + // CHECK-LABEL: @vftintrneh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrneh_l_s(v4f32 _1) { return __lsx_vftintrneh_l_s(_1); } + // CHECK-LABEL: @vfrintrne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <4 x float> TMP0 to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> TMP1 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vfrintrne_s(v4f32 _1) { return __lsx_vfrintrne_s(_1); } + // CHECK-LABEL: @vfrintrne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <2 x double> TMP0 to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> TMP1 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vfrintrne_d(v2f64 _1) { return __lsx_vfrintrne_d(_1); } + // CHECK-LABEL: @vfrintrz_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <4 x float> TMP0 to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> TMP1 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vfrintrz_s(v4f32 _1) { return __lsx_vfrintrz_s(_1); } + // CHECK-LABEL: @vfrintrz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <2 x double> TMP0 to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> TMP1 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vfrintrz_d(v2f64 _1) { return __lsx_vfrintrz_d(_1); } + // CHECK-LABEL: @vfrintrp_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <4 x float> TMP0 to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> TMP1 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vfrintrp_s(v4f32 _1) { return __lsx_vfrintrp_s(_1); } + // CHECK-LABEL: @vfrintrp_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <2 x double> TMP0 to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> TMP1 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vfrintrp_d(v2f64 _1) { return __lsx_vfrintrp_d(_1); } + // CHECK-LABEL: @vfrintrm_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <4 x float> TMP0 to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> TMP1 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vfrintrm_s(v4f32 _1) { return __lsx_vfrintrm_s(_1); } + // CHECK-LABEL: @vfrintrm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <2 x double> TMP0 to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> TMP1 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vfrintrm_d(v2f64 _1) { return __lsx_vfrintrm_d(_1); } + // CHECK-LABEL: @vstelm_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> _1:%.*, ptr _2:%.*, i32 1, i32 1) ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> TMP0, ptr _2:%.*, i32 1, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_b(v16i8 _1, void *_2) { return __lsx_vstelm_b(_1, _2, 1, 1); } + // CHECK-LABEL: @vstelm_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> _1:%.*, ptr _2:%.*, i32 2, i32 1) ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> TMP0, ptr _2:%.*, i32 2, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_h(v8i16 _1, void *_2) { return __lsx_vstelm_h(_1, _2, 2, 1); } + // CHECK-LABEL: @vstelm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> _1:%.*, ptr _2:%.*, i32 4, i32 1) ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> TMP0, ptr _2:%.*, i32 4, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_w(v4i32 _1, void *_2) { return __lsx_vstelm_w(_1, _2, 4, 1); } + // CHECK-LABEL: @vstelm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> _1:%.*, ptr _2:%.*, i32 8, i32 1) ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> TMP0, ptr _2:%.*, i32 8, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_d(v2i64 _1, void *_2) { return __lsx_vstelm_d(_1, _2, 8, 1); } + // CHECK-LABEL: @vaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwev_d_w(_1, _2); } + // CHECK-LABEL: @vaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwev_w_h(_1, _2); } + // CHECK-LABEL: @vaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwev_h_b(_1, _2); } + // CHECK-LABEL: @vaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vaddwod_d_w(_1, _2); } + // CHECK-LABEL: @vaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vaddwod_w_h(_1, _2); } + // CHECK-LABEL: @vaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vaddwod_h_b(_1, _2); } + // CHECK-LABEL: @vaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwev_d_wu(_1, _2); } + // CHECK-LABEL: @vaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwev_w_hu(_1, _2); } + // CHECK-LABEL: @vaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwev_h_bu(_1, _2); } + // CHECK-LABEL: @vaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vaddwod_d_wu(_1, _2); } + // CHECK-LABEL: @vaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vaddwod_w_hu(_1, _2); } + // CHECK-LABEL: @vaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vaddwod_h_bu(_1, _2); } + // CHECK-LABEL: @vaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vaddwev_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vaddwev_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vaddwev_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vaddwod_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vaddwod_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vaddwod_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vsubwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwev_d_w(_1, _2); } + // CHECK-LABEL: @vsubwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwev_w_h(_1, _2); } + // CHECK-LABEL: @vsubwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwev_h_b(_1, _2); } + // CHECK-LABEL: @vsubwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vsubwod_d_w(_1, _2); } + // CHECK-LABEL: @vsubwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vsubwod_w_h(_1, _2); } + // CHECK-LABEL: @vsubwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vsubwod_h_b(_1, _2); } + // CHECK-LABEL: @vsubwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwev_d_wu(_1, _2); } + // CHECK-LABEL: @vsubwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwev_w_hu(_1, _2); } + // CHECK-LABEL: @vsubwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwev_h_bu(_1, _2); } + // CHECK-LABEL: @vsubwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vsubwod_d_wu(_1, _2); } + // CHECK-LABEL: @vsubwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vsubwod_w_hu(_1, _2); } + // CHECK-LABEL: @vsubwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vsubwod_h_bu(_1, _2); } + // CHECK-LABEL: @vaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwev_q_d(_1, _2); } + // CHECK-LABEL: @vaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vaddwod_q_d(_1, _2); } + // CHECK-LABEL: @vaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwev_q_du(_1, _2); } + // CHECK-LABEL: @vaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vaddwod_q_du(_1, _2); } + // CHECK-LABEL: @vsubwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwev_q_d(_1, _2); } + // CHECK-LABEL: @vsubwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vsubwod_q_d(_1, _2); } + // CHECK-LABEL: @vsubwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwev_q_du(_1, _2); } + // CHECK-LABEL: @vsubwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vsubwod_q_du(_1, _2); } + // CHECK-LABEL: @vaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vaddwev_q_du_d(_1, _2); + } + // CHECK-LABEL: @vaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vaddwod_q_du_d(_1, _2); + } + // CHECK-LABEL: @vmulwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwev_d_w(_1, _2); } + // CHECK-LABEL: @vmulwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwev_w_h(_1, _2); } + // CHECK-LABEL: @vmulwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwev_h_b(_1, _2); } + // CHECK-LABEL: @vmulwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { return __lsx_vmulwod_d_w(_1, _2); } + // CHECK-LABEL: @vmulwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { return __lsx_vmulwod_w_h(_1, _2); } + // CHECK-LABEL: @vmulwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { return __lsx_vmulwod_h_b(_1, _2); } + // CHECK-LABEL: @vmulwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwev_d_wu(_1, _2); } + // CHECK-LABEL: @vmulwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwev_w_hu(_1, _2); } + // CHECK-LABEL: @vmulwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwev_h_bu(_1, _2); } + // CHECK-LABEL: @vmulwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { return __lsx_vmulwod_d_wu(_1, _2); } + // CHECK-LABEL: @vmulwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { return __lsx_vmulwod_w_hu(_1, _2); } + // CHECK-LABEL: @vmulwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { return __lsx_vmulwod_h_bu(_1, _2); } + // CHECK-LABEL: @vmulwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vmulwev_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vmulwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vmulwev_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vmulwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vmulwev_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vmulwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __lsx_vmulwod_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vmulwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __lsx_vmulwod_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vmulwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __lsx_vmulwod_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vmulwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwev_q_d(_1, _2); } + // CHECK-LABEL: @vmulwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { return __lsx_vmulwod_q_d(_1, _2); } + // CHECK-LABEL: @vmulwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwev_q_du(_1, _2); } + // CHECK-LABEL: @vmulwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { return __lsx_vmulwod_q_du(_1, _2); } + // CHECK-LABEL: @vmulwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vmulwev_q_du_d(_1, _2); + } + // CHECK-LABEL: @vmulwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { + return __lsx_vmulwod_q_du_d(_1, _2); + } + // CHECK-LABEL: @vhaddw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhaddw_q_d(_1, _2); } + // CHECK-LABEL: @vhaddw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhaddw_qu_du(_1, _2); } + // CHECK-LABEL: @vhsubw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { return __lsx_vhsubw_q_d(_1, _2); } + // CHECK-LABEL: @vhsubw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { return __lsx_vhsubw_qu_du(_1, _2); } + // CHECK-LABEL: @vmaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmaddwev_d_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmaddwev_w_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmaddwev_h_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __lsx_vmaddwev_d_wu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __lsx_vmaddwev_w_hu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __lsx_vmaddwev_h_bu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __lsx_vmaddwod_d_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __lsx_vmaddwod_w_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __lsx_vmaddwod_h_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __lsx_vmaddwod_d_wu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __lsx_vmaddwod_w_hu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __lsx_vmaddwod_h_bu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __lsx_vmaddwev_d_wu_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __lsx_vmaddwev_w_hu_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __lsx_vmaddwev_h_bu_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __lsx_vmaddwod_d_wu_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __lsx_vmaddwod_w_hu_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __lsx_vmaddwod_h_bu_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmaddwev_q_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __lsx_vmaddwod_q_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __lsx_vmaddwev_q_du(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __lsx_vmaddwod_q_du(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __lsx_vmaddwev_q_du_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __lsx_vmaddwod_q_du_d(_1, _2, _3); + } + // CHECK-LABEL: @vrotr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vrotr_b(v16i8 _1, v16i8 _2) { return __lsx_vrotr_b(_1, _2); } + // CHECK-LABEL: @vrotr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vrotr_h(v8i16 _1, v8i16 _2) { return __lsx_vrotr_h(_1, _2); } + // CHECK-LABEL: @vrotr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vrotr_w(v4i32 _1, v4i32 _2) { return __lsx_vrotr_w(_1, _2); } + // CHECK-LABEL: @vrotr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vrotr_d(v2i64 _1, v2i64 _2) { return __lsx_vrotr_d(_1, _2); } + // CHECK-LABEL: @vadd_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __lsx_vadd_q(_1, _2); } + // CHECK-LABEL: @vsub_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __lsx_vsub_q(_1, _2); } + // CHECK-LABEL: @vldrepl_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <16 x i8> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v16i8 vldrepl_b(void *_1) { return __lsx_vldrepl_b(_1, 1); } + // CHECK-LABEL: @vldrepl_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr _1:%.*, i32 2) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <8 x i16> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v8i16 vldrepl_h(void *_1) { return __lsx_vldrepl_h(_1, 2); } + // CHECK-LABEL: @vldrepl_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr _1:%.*, i32 4) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x i32> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v4i32 vldrepl_w(void *_1) { return __lsx_vldrepl_w(_1, 4); } + // CHECK-LABEL: @vldrepl_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr _1:%.*, i32 8) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <2 x i64> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v2i64 vldrepl_d(void *_1) { return __lsx_vldrepl_d(_1, 8); } + // CHECK-LABEL: @vmskgez_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vmskgez_b(v16i8 _1) { return __lsx_vmskgez_b(_1); } + // CHECK-LABEL: @vmsknz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vmsknz_b(v16i8 _1) { return __lsx_vmsknz_b(_1); } + // CHECK-LABEL: @vexth_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vexth_h_b(v16i8 _1) { return __lsx_vexth_h_b(_1); } + // CHECK-LABEL: @vexth_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vexth_w_h(v8i16 _1) { return __lsx_vexth_w_h(_1); } + // CHECK-LABEL: @vexth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vexth_d_w(v4i32 _1) { return __lsx_vexth_d_w(_1); } + // CHECK-LABEL: @vexth_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vexth_q_d(v2i64 _1) { return __lsx_vexth_q_d(_1); } + // CHECK-LABEL: @vexth_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8u16 vexth_hu_bu(v16u8 _1) { return __lsx_vexth_hu_bu(_1); } + // CHECK-LABEL: @vexth_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vexth_wu_hu(v8u16 _1) { return __lsx_vexth_wu_hu(_1); } + // CHECK-LABEL: @vexth_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vexth_du_wu(v4u32 _1) { return __lsx_vexth_du_wu(_1); } + // CHECK-LABEL: @vexth_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vexth_qu_du(v2u64 _1) { return __lsx_vexth_qu_du(_1); } + // CHECK-LABEL: @vrotri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vrotri_b(v16i8 _1) { return __lsx_vrotri_b(_1, 1); } + // CHECK-LABEL: @vrotri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vrotri_h(v8i16 _1) { return __lsx_vrotri_h(_1, 1); } + // CHECK-LABEL: @vrotri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vrotri_w(v4i32 _1) { return __lsx_vrotri_w(_1, 1); } + // CHECK-LABEL: @vrotri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vrotri_d(v2i64 _1) { return __lsx_vrotri_d(_1, 1); } + // CHECK-LABEL: @vextl_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vextl_q_d(v2i64 _1) { return __lsx_vextl_q_d(_1); } + // CHECK-LABEL: @vsrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vsrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vsrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vsrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vsrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vsrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vsrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vsrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrlni_bu_h(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrlni_hu_w(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrlni_wu_d(_1, _2, 1); } + // CHECK-LABEL: @vssrlni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrlni_du_q(_1, _2, 1); } + // CHECK-LABEL: @vssrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrlrni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vssrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrlrni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vssrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrlrni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vssrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrlrni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrlrni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { + return __lsx_vssrlrni_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { + return __lsx_vssrlrni_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { + return __lsx_vssrlrni_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { + return __lsx_vssrlrni_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vsrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @vsrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @vsrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @vsrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @vsrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vsrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vsrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vsrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vsrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vsrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vsrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vsrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrani_b_h(_1, _2, 1); } + // CHECK-LABEL: @vssrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrani_h_w(_1, _2, 1); } + // CHECK-LABEL: @vssrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrani_w_d(_1, _2, 1); } + // CHECK-LABEL: @vssrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrani_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrani_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { return __lsx_vssrani_bu_h(_1, _2, 1); } + // CHECK-LABEL: @vssrani_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { return __lsx_vssrani_hu_w(_1, _2, 1); } + // CHECK-LABEL: @vssrani_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { return __lsx_vssrani_wu_d(_1, _2, 1); } + // CHECK-LABEL: @vssrani_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { return __lsx_vssrani_du_q(_1, _2, 1); } + // CHECK-LABEL: @vssrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { return __lsx_vssrarni_b_h(_1, _2, 1); } + // CHECK-LABEL: @vssrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { return __lsx_vssrarni_h_w(_1, _2, 1); } + // CHECK-LABEL: @vssrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { return __lsx_vssrarni_w_d(_1, _2, 1); } + // CHECK-LABEL: @vssrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { return __lsx_vssrarni_d_q(_1, _2, 1); } + // CHECK-LABEL: @vssrarni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { + return __lsx_vssrarni_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { + return __lsx_vssrarni_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { + return __lsx_vssrarni_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { + return __lsx_vssrarni_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vpermi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vpermi_w(v4i32 _1, v4i32 _2) { return __lsx_vpermi_w(_1, _2, 1); } + // CHECK-LABEL: @vld( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <16 x i8> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v16i8 vld(void *_1) { return __lsx_vld(_1, 1); } + // CHECK-LABEL: @vst( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> _1:%.*, ptr _2:%.*, i32 1) ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> TMP0, ptr _2:%.*, i32 1) + // CHECK-NEXT: ret void + // + void vst(v16i8 _1, void *_2) { return __lsx_vst(_1, _2, 1); } + // CHECK-LABEL: @vssrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrlrn_b_h(_1, _2); } + // CHECK-LABEL: @vssrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrlrn_h_w(_1, _2); } + // CHECK-LABEL: @vssrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrlrn_w_d(_1, _2); } + // CHECK-LABEL: @vssrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { return __lsx_vssrln_b_h(_1, _2); } + // CHECK-LABEL: @vssrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { return __lsx_vssrln_h_w(_1, _2); } + // CHECK-LABEL: @vssrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { return __lsx_vssrln_w_d(_1, _2); } + // CHECK-LABEL: @vorn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __lsx_vorn_v(_1, _2); } + // CHECK-LABEL: @vldi( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <2 x i64> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v2i64 vldi() { return __lsx_vldi(1); } + // CHECK-LABEL: @vshuf_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <16 x i8> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __lsx_vshuf_b(_1, _2, _3); +@@ -4086,366 +5844,516 @@ v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + // CHECK-LABEL: @vldx( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr _1:%.*, i64 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <16 x i8> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v16i8 vldx(void *_1) { return __lsx_vldx(_1, 1); } + // CHECK-LABEL: @vstx( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> _1:%.*, ptr _2:%.*, i64 1) ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> TMP0, ptr _2:%.*, i64 1) + // CHECK-NEXT: ret void + // + void vstx(v16i8 _1, void *_2) { return __lsx_vstx(_1, _2, 1); } + // CHECK-LABEL: @vextl_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vextl_qu_du(v2u64 _1) { return __lsx_vextl_qu_du(_1); } + // CHECK-LABEL: @bnz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bnz_b(v16u8 _1) { return __lsx_bnz_b(_1); } + // CHECK-LABEL: @bnz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bnz_d(v2u64 _1) { return __lsx_bnz_d(_1); } + // CHECK-LABEL: @bnz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bnz_h(v8u16 _1) { return __lsx_bnz_h(_1); } + // CHECK-LABEL: @bnz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bnz_v(v16u8 _1) { return __lsx_bnz_v(_1); } + // CHECK-LABEL: @bnz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bnz_w(v4u32 _1) { return __lsx_bnz_w(_1); } + // CHECK-LABEL: @bz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bz_b(v16u8 _1) { return __lsx_bz_b(_1); } + // CHECK-LABEL: @bz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bz_d(v2u64 _1) { return __lsx_bz_d(_1); } + // CHECK-LABEL: @bz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bz_h(v8u16 _1) { return __lsx_bz_h(_1); } + // CHECK-LABEL: @bz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bz_v(v16u8 _1) { return __lsx_bz_v(_1); } + // CHECK-LABEL: @bz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bz_w(v4u32 _1) { return __lsx_bz_w(_1); } + // CHECK-LABEL: @vfcmp_caf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_caf_d(_1, _2); } + // CHECK-LABEL: @vfcmp_caf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_caf_s(_1, _2); } + // CHECK-LABEL: @vfcmp_ceq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_ceq_d(_1, _2); } + // CHECK-LABEL: @vfcmp_ceq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_ceq_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cle_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cle_s(_1, _2); } + // CHECK-LABEL: @vfcmp_clt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_clt_d(_1, _2); } + // CHECK-LABEL: @vfcmp_clt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_clt_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cne_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cne_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cor_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cor_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cueq_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cueq_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cule_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cule_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cult_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cult_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cun_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_cune_d(_1, _2); } + // CHECK-LABEL: @vfcmp_cune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cune_s(_1, _2); } + // CHECK-LABEL: @vfcmp_cun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_cun_s(_1, _2); } + // CHECK-LABEL: @vfcmp_saf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_saf_d(_1, _2); } + // CHECK-LABEL: @vfcmp_saf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_saf_s(_1, _2); } + // CHECK-LABEL: @vfcmp_seq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_seq_d(_1, _2); } + // CHECK-LABEL: @vfcmp_seq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_seq_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sle_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sle_s(_1, _2); } + // CHECK-LABEL: @vfcmp_slt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_slt_d(_1, _2); } + // CHECK-LABEL: @vfcmp_slt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_slt_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sne_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sne_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sor_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sor_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sueq_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sueq_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sule_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sule_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sult_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sult_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sun_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { return __lsx_vfcmp_sune_d(_1, _2); } + // CHECK-LABEL: @vfcmp_sune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sune_s(_1, _2); } + // CHECK-LABEL: @vfcmp_sun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { return __lsx_vfcmp_sun_s(_1, _2); } + // CHECK-LABEL: @vrepli_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <16 x i8> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v16i8 vrepli_b() { return __lsx_vrepli_b(1); } + // CHECK-LABEL: @vrepli_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <2 x i64> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v2i64 vrepli_d() { return __lsx_vrepli_d(1); } + // CHECK-LABEL: @vrepli_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <8 x i16> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v8i16 vrepli_h() { return __lsx_vrepli_h(1); } + // CHECK-LABEL: @vrepli_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x i32> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v4i32 vrepli_w() { return __lsx_vrepli_w(1); } +diff --git a/clang/test/CodeGen/LoongArch/lsx/builtin.c b/clang/test/CodeGen/LoongArch/lsx/builtin.c +index ef5a390e1838..05a3d13a7fb9 100644 +--- a/clang/test/CodeGen/LoongArch/lsx/builtin.c ++++ b/clang/test/CodeGen/LoongArch/lsx/builtin.c +@@ -29,3319 +29,4547 @@ typedef double __m128d __attribute__ ((__vector_size__ (16), __may_alias__)); + + // CHECK-LABEL: @vsll_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsll.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsll_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsll_b(_1, _2); } + // CHECK-LABEL: @vsll_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsll.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsll_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsll_h(_1, _2); } + // CHECK-LABEL: @vsll_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsll.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsll_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsll_w(_1, _2); } + // CHECK-LABEL: @vsll_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsll.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsll_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsll_d(_1, _2); } + // CHECK-LABEL: @vslli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslli.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vslli_b(v16i8 _1) { return __builtin_lsx_vslli_b(_1, 1); } + // CHECK-LABEL: @vslli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslli.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vslli_h(v8i16 _1) { return __builtin_lsx_vslli_h(_1, 1); } + // CHECK-LABEL: @vslli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslli.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vslli_w(v4i32 _1) { return __builtin_lsx_vslli_w(_1, 1); } + // CHECK-LABEL: @vslli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslli.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vslli_d(v2i64 _1) { return __builtin_lsx_vslli_d(_1, 1); } + // CHECK-LABEL: @vsra_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsra.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsra_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsra_b(_1, _2); } + // CHECK-LABEL: @vsra_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsra.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsra_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsra_h(_1, _2); } + // CHECK-LABEL: @vsra_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsra.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsra_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsra_w(_1, _2); } + // CHECK-LABEL: @vsra_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsra.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsra_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsra_d(_1, _2); } + // CHECK-LABEL: @vsrai_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrai.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vsrai_b(v16i8 _1) { return __builtin_lsx_vsrai_b(_1, 1); } + // CHECK-LABEL: @vsrai_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrai.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vsrai_h(v8i16 _1) { return __builtin_lsx_vsrai_h(_1, 1); } + // CHECK-LABEL: @vsrai_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrai.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vsrai_w(v4i32 _1) { return __builtin_lsx_vsrai_w(_1, 1); } + // CHECK-LABEL: @vsrai_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrai.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vsrai_d(v2i64 _1) { return __builtin_lsx_vsrai_d(_1, 1); } + // CHECK-LABEL: @vsrar_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrar.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrar_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrar_b(_1, _2); + } + // CHECK-LABEL: @vsrar_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrar.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrar_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrar_h(_1, _2); + } + // CHECK-LABEL: @vsrar_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrar.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrar_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrar_w(_1, _2); + } + // CHECK-LABEL: @vsrar_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrar.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsrar_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrar_d(_1, _2); + } + // CHECK-LABEL: @vsrari_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrari.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vsrari_b(v16i8 _1) { return __builtin_lsx_vsrari_b(_1, 1); } + // CHECK-LABEL: @vsrari_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrari.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vsrari_h(v8i16 _1) { return __builtin_lsx_vsrari_h(_1, 1); } + // CHECK-LABEL: @vsrari_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrari.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vsrari_w(v4i32 _1) { return __builtin_lsx_vsrari_w(_1, 1); } + // CHECK-LABEL: @vsrari_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrari.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vsrari_d(v2i64 _1) { return __builtin_lsx_vsrari_d(_1, 1); } + // CHECK-LABEL: @vsrl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrl.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrl_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsrl_b(_1, _2); } + // CHECK-LABEL: @vsrl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrl.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrl_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsrl_h(_1, _2); } + // CHECK-LABEL: @vsrl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrl.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrl_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsrl_w(_1, _2); } + // CHECK-LABEL: @vsrl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrl.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsrl_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsrl_d(_1, _2); } + // CHECK-LABEL: @vsrli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrli.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vsrli_b(v16i8 _1) { return __builtin_lsx_vsrli_b(_1, 1); } + // CHECK-LABEL: @vsrli_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrli.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vsrli_h(v8i16 _1) { return __builtin_lsx_vsrli_h(_1, 1); } + // CHECK-LABEL: @vsrli_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrli.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vsrli_w(v4i32 _1) { return __builtin_lsx_vsrli_w(_1, 1); } + // CHECK-LABEL: @vsrli_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrli.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vsrli_d(v2i64 _1) { return __builtin_lsx_vsrli_d(_1, 1); } + // CHECK-LABEL: @vsrlr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlr.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrlr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlr_b(_1, _2); + } + // CHECK-LABEL: @vsrlr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlr.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrlr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlr_h(_1, _2); + } + // CHECK-LABEL: @vsrlr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlr.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrlr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlr_w(_1, _2); + } + // CHECK-LABEL: @vsrlr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlr.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsrlr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlr_d(_1, _2); + } + // CHECK-LABEL: @vsrlri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlri.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vsrlri_b(v16i8 _1) { return __builtin_lsx_vsrlri_b(_1, 1); } + // CHECK-LABEL: @vsrlri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlri.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vsrlri_h(v8i16 _1) { return __builtin_lsx_vsrlri_h(_1, 1); } + // CHECK-LABEL: @vsrlri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlri.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vsrlri_w(v4i32 _1) { return __builtin_lsx_vsrlri_w(_1, 1); } + // CHECK-LABEL: @vsrlri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlri.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vsrlri_d(v2i64 _1) { return __builtin_lsx_vsrlri_d(_1, 1); } + // CHECK-LABEL: @vbitclr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitclr.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vbitclr_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitclr_b(_1, _2); + } + // CHECK-LABEL: @vbitclr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitclr.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vbitclr_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitclr_h(_1, _2); + } + // CHECK-LABEL: @vbitclr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitclr.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vbitclr_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitclr_w(_1, _2); + } + // CHECK-LABEL: @vbitclr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitclr.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vbitclr_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitclr_d(_1, _2); + } + // CHECK-LABEL: @vbitclri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitclri.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vbitclri_b(v16u8 _1) { return __builtin_lsx_vbitclri_b(_1, 1); } + // CHECK-LABEL: @vbitclri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitclri.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8u16 vbitclri_h(v8u16 _1) { return __builtin_lsx_vbitclri_h(_1, 1); } + // CHECK-LABEL: @vbitclri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitclri.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vbitclri_w(v4u32 _1) { return __builtin_lsx_vbitclri_w(_1, 1); } + // CHECK-LABEL: @vbitclri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitclri.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vbitclri_d(v2u64 _1) { return __builtin_lsx_vbitclri_d(_1, 1); } + // CHECK-LABEL: @vbitset_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitset.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vbitset_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitset_b(_1, _2); + } + // CHECK-LABEL: @vbitset_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitset.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vbitset_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitset_h(_1, _2); + } + // CHECK-LABEL: @vbitset_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitset.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vbitset_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitset_w(_1, _2); + } + // CHECK-LABEL: @vbitset_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitset.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vbitset_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitset_d(_1, _2); + } + // CHECK-LABEL: @vbitseti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitseti.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vbitseti_b(v16u8 _1) { return __builtin_lsx_vbitseti_b(_1, 1); } + // CHECK-LABEL: @vbitseti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitseti.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8u16 vbitseti_h(v8u16 _1) { return __builtin_lsx_vbitseti_h(_1, 1); } + // CHECK-LABEL: @vbitseti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitseti.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vbitseti_w(v4u32 _1) { return __builtin_lsx_vbitseti_w(_1, 1); } + // CHECK-LABEL: @vbitseti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitseti.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vbitseti_d(v2u64 _1) { return __builtin_lsx_vbitseti_d(_1, 1); } + // CHECK-LABEL: @vbitrev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitrev.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vbitrev_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitrev_b(_1, _2); + } + // CHECK-LABEL: @vbitrev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitrev.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vbitrev_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vbitrev_h(_1, _2); + } + // CHECK-LABEL: @vbitrev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitrev.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vbitrev_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vbitrev_w(_1, _2); + } + // CHECK-LABEL: @vbitrev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitrev.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vbitrev_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vbitrev_d(_1, _2); + } + // CHECK-LABEL: @vbitrevi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitrevi.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vbitrevi_b(v16u8 _1) { return __builtin_lsx_vbitrevi_b(_1, 1); } + // CHECK-LABEL: @vbitrevi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vbitrevi.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8u16 vbitrevi_h(v8u16 _1) { return __builtin_lsx_vbitrevi_h(_1, 1); } + // CHECK-LABEL: @vbitrevi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vbitrevi.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vbitrevi_w(v4u32 _1) { return __builtin_lsx_vbitrevi_w(_1, 1); } + // CHECK-LABEL: @vbitrevi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vbitrevi.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vbitrevi_d(v2u64 _1) { return __builtin_lsx_vbitrevi_d(_1, 1); } + // CHECK-LABEL: @vadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vadd.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vadd_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vadd_b(_1, _2); } + // CHECK-LABEL: @vadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vadd.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vadd_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vadd_h(_1, _2); } + // CHECK-LABEL: @vadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vadd.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vadd_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vadd_w(_1, _2); } + // CHECK-LABEL: @vadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadd.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vadd_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_d(_1, _2); } + // CHECK-LABEL: @vaddi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vaddi.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vaddi_bu(v16i8 _1) { return __builtin_lsx_vaddi_bu(_1, 1); } + // CHECK-LABEL: @vaddi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddi.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vaddi_hu(v8i16 _1) { return __builtin_lsx_vaddi_hu(_1, 1); } + // CHECK-LABEL: @vaddi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddi.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vaddi_wu(v4i32 _1) { return __builtin_lsx_vaddi_wu(_1, 1); } + // CHECK-LABEL: @vaddi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddi.du(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vaddi_du(v2i64 _1) { return __builtin_lsx_vaddi_du(_1, 1); } + // CHECK-LABEL: @vsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsub.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsub_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsub_b(_1, _2); } + // CHECK-LABEL: @vsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsub.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsub_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsub_h(_1, _2); } + // CHECK-LABEL: @vsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsub.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsub_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsub_w(_1, _2); } + // CHECK-LABEL: @vsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsub.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsub_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_d(_1, _2); } + // CHECK-LABEL: @vsubi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsubi.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vsubi_bu(v16i8 _1) { return __builtin_lsx_vsubi_bu(_1, 1); } + // CHECK-LABEL: @vsubi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubi.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vsubi_hu(v8i16 _1) { return __builtin_lsx_vsubi_hu(_1, 1); } + // CHECK-LABEL: @vsubi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubi.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vsubi_wu(v4i32 _1) { return __builtin_lsx_vsubi_wu(_1, 1); } + // CHECK-LABEL: @vsubi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubi.du(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vsubi_du(v2i64 _1) { return __builtin_lsx_vsubi_du(_1, 1); } + // CHECK-LABEL: @vmax_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmax.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vmax_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmax_b(_1, _2); } + // CHECK-LABEL: @vmax_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmax.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmax_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmax_h(_1, _2); } + // CHECK-LABEL: @vmax_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmax.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmax_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmax_w(_1, _2); } + // CHECK-LABEL: @vmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmax.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmax_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmax_d(_1, _2); } + // CHECK-LABEL: @vmaxi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vmaxi_b(v16i8 _1) { return __builtin_lsx_vmaxi_b(_1, 1); } + // CHECK-LABEL: @vmaxi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vmaxi_h(v8i16 _1) { return __builtin_lsx_vmaxi_h(_1, 1); } + // CHECK-LABEL: @vmaxi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vmaxi_w(v4i32 _1) { return __builtin_lsx_vmaxi_w(_1, 1); } + // CHECK-LABEL: @vmaxi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vmaxi_d(v2i64 _1) { return __builtin_lsx_vmaxi_d(_1, 1); } + // CHECK-LABEL: @vmax_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmax.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vmax_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmax_bu(_1, _2); + } + // CHECK-LABEL: @vmax_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmax.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vmax_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmax_hu(_1, _2); + } + // CHECK-LABEL: @vmax_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmax.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vmax_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmax_wu(_1, _2); + } + // CHECK-LABEL: @vmax_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmax.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vmax_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmax_du(_1, _2); + } + // CHECK-LABEL: @vmaxi_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmaxi.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vmaxi_bu(v16u8 _1) { return __builtin_lsx_vmaxi_bu(_1, 1); } + // CHECK-LABEL: @vmaxi_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaxi.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8u16 vmaxi_hu(v8u16 _1) { return __builtin_lsx_vmaxi_hu(_1, 1); } + // CHECK-LABEL: @vmaxi_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaxi.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vmaxi_wu(v4u32 _1) { return __builtin_lsx_vmaxi_wu(_1, 1); } + // CHECK-LABEL: @vmaxi_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaxi.du(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vmaxi_du(v2u64 _1) { return __builtin_lsx_vmaxi_du(_1, 1); } + // CHECK-LABEL: @vmin_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmin.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vmin_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmin_b(_1, _2); } + // CHECK-LABEL: @vmin_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmin.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmin_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmin_h(_1, _2); } + // CHECK-LABEL: @vmin_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmin.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmin_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmin_w(_1, _2); } + // CHECK-LABEL: @vmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmin.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmin_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmin_d(_1, _2); } + // CHECK-LABEL: @vmini_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmini.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vmini_b(v16i8 _1) { return __builtin_lsx_vmini_b(_1, 1); } + // CHECK-LABEL: @vmini_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmini.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vmini_h(v8i16 _1) { return __builtin_lsx_vmini_h(_1, 1); } + // CHECK-LABEL: @vmini_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmini.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vmini_w(v4i32 _1) { return __builtin_lsx_vmini_w(_1, 1); } + // CHECK-LABEL: @vmini_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmini.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vmini_d(v2i64 _1) { return __builtin_lsx_vmini_d(_1, 1); } + // CHECK-LABEL: @vmin_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmin.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vmin_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmin_bu(_1, _2); + } + // CHECK-LABEL: @vmin_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmin.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vmin_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmin_hu(_1, _2); + } + // CHECK-LABEL: @vmin_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmin.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vmin_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmin_wu(_1, _2); + } + // CHECK-LABEL: @vmin_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmin.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vmin_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmin_du(_1, _2); + } + // CHECK-LABEL: @vmini_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmini.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vmini_bu(v16u8 _1) { return __builtin_lsx_vmini_bu(_1, 1); } + // CHECK-LABEL: @vmini_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmini.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8u16 vmini_hu(v8u16 _1) { return __builtin_lsx_vmini_hu(_1, 1); } + // CHECK-LABEL: @vmini_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmini.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vmini_wu(v4u32 _1) { return __builtin_lsx_vmini_wu(_1, 1); } + // CHECK-LABEL: @vmini_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmini.du(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vmini_du(v2u64 _1) { return __builtin_lsx_vmini_du(_1, 1); } + // CHECK-LABEL: @vseq_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vseq.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vseq_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vseq_b(_1, _2); } + // CHECK-LABEL: @vseq_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vseq.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vseq_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vseq_h(_1, _2); } + // CHECK-LABEL: @vseq_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vseq.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vseq_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vseq_w(_1, _2); } + // CHECK-LABEL: @vseq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vseq.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vseq_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vseq_d(_1, _2); } + // CHECK-LABEL: @vseqi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vseqi.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vseqi_b(v16i8 _1) { return __builtin_lsx_vseqi_b(_1, 1); } + // CHECK-LABEL: @vseqi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vseqi.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vseqi_h(v8i16 _1) { return __builtin_lsx_vseqi_h(_1, 1); } + // CHECK-LABEL: @vseqi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vseqi.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vseqi_w(v4i32 _1) { return __builtin_lsx_vseqi_w(_1, 1); } + // CHECK-LABEL: @vseqi_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vseqi.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vseqi_d(v2i64 _1) { return __builtin_lsx_vseqi_d(_1, 1); } + // CHECK-LABEL: @vslti_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslti.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vslti_b(v16i8 _1) { return __builtin_lsx_vslti_b(_1, 1); } + // CHECK-LABEL: @vslt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslt.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vslt_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vslt_b(_1, _2); } + // CHECK-LABEL: @vslt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslt.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vslt_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vslt_h(_1, _2); } + // CHECK-LABEL: @vslt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslt.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vslt_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vslt_w(_1, _2); } + // CHECK-LABEL: @vslt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslt.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vslt_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vslt_d(_1, _2); } + // CHECK-LABEL: @vslti_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslti.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vslti_h(v8i16 _1) { return __builtin_lsx_vslti_h(_1, 1); } + // CHECK-LABEL: @vslti_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslti.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vslti_w(v4i32 _1) { return __builtin_lsx_vslti_w(_1, 1); } + // CHECK-LABEL: @vslti_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslti.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vslti_d(v2i64 _1) { return __builtin_lsx_vslti_d(_1, 1); } + // CHECK-LABEL: @vslt_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslt.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vslt_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vslt_bu(_1, _2); + } + // CHECK-LABEL: @vslt_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslt.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vslt_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vslt_hu(_1, _2); + } + // CHECK-LABEL: @vslt_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslt.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vslt_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vslt_wu(_1, _2); + } + // CHECK-LABEL: @vslt_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslt.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vslt_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vslt_du(_1, _2); + } + // CHECK-LABEL: @vslti_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslti.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vslti_bu(v16u8 _1) { return __builtin_lsx_vslti_bu(_1, 1); } + // CHECK-LABEL: @vslti_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslti.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vslti_hu(v8u16 _1) { return __builtin_lsx_vslti_hu(_1, 1); } + // CHECK-LABEL: @vslti_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslti.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vslti_wu(v4u32 _1) { return __builtin_lsx_vslti_wu(_1, 1); } + // CHECK-LABEL: @vslti_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslti.du(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vslti_du(v2u64 _1) { return __builtin_lsx_vslti_du(_1, 1); } + // CHECK-LABEL: @vsle_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsle.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsle_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vsle_b(_1, _2); } + // CHECK-LABEL: @vsle_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsle.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsle_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vsle_h(_1, _2); } + // CHECK-LABEL: @vsle_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsle.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsle_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vsle_w(_1, _2); } + // CHECK-LABEL: @vsle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsle.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsle_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsle_d(_1, _2); } + // CHECK-LABEL: @vslei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslei.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vslei_b(v16i8 _1) { return __builtin_lsx_vslei_b(_1, 1); } + // CHECK-LABEL: @vslei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslei.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vslei_h(v8i16 _1) { return __builtin_lsx_vslei_h(_1, 1); } + // CHECK-LABEL: @vslei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslei.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vslei_w(v4i32 _1) { return __builtin_lsx_vslei_w(_1, 1); } + // CHECK-LABEL: @vslei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslei.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vslei_d(v2i64 _1) { return __builtin_lsx_vslei_d(_1, 1); } + // CHECK-LABEL: @vsle_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsle.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsle_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsle_bu(_1, _2); + } + // CHECK-LABEL: @vsle_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsle.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsle_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsle_hu(_1, _2); + } + // CHECK-LABEL: @vsle_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsle.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsle_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsle_wu(_1, _2); + } + // CHECK-LABEL: @vsle_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsle.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsle_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsle_du(_1, _2); + } + // CHECK-LABEL: @vslei_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vslei.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vslei_bu(v16u8 _1) { return __builtin_lsx_vslei_bu(_1, 1); } + // CHECK-LABEL: @vslei_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vslei.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vslei_hu(v8u16 _1) { return __builtin_lsx_vslei_hu(_1, 1); } + // CHECK-LABEL: @vslei_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vslei.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vslei_wu(v4u32 _1) { return __builtin_lsx_vslei_wu(_1, 1); } + // CHECK-LABEL: @vslei_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vslei.du(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vslei_du(v2u64 _1) { return __builtin_lsx_vslei_du(_1, 1); } + // CHECK-LABEL: @vsat_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsat.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vsat_b(v16i8 _1) { return __builtin_lsx_vsat_b(_1, 1); } + // CHECK-LABEL: @vsat_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsat.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vsat_h(v8i16 _1) { return __builtin_lsx_vsat_h(_1, 1); } + // CHECK-LABEL: @vsat_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsat.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vsat_w(v4i32 _1) { return __builtin_lsx_vsat_w(_1, 1); } + // CHECK-LABEL: @vsat_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsat.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vsat_d(v2i64 _1) { return __builtin_lsx_vsat_d(_1, 1); } + // CHECK-LABEL: @vsat_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsat.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vsat_bu(v16u8 _1) { return __builtin_lsx_vsat_bu(_1, 1); } + // CHECK-LABEL: @vsat_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsat.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8u16 vsat_hu(v8u16 _1) { return __builtin_lsx_vsat_hu(_1, 1); } + // CHECK-LABEL: @vsat_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsat.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vsat_wu(v4u32 _1) { return __builtin_lsx_vsat_wu(_1, 1); } + // CHECK-LABEL: @vsat_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsat.du(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vsat_du(v2u64 _1) { return __builtin_lsx_vsat_du(_1, 1); } + // CHECK-LABEL: @vadda_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vadda.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vadda_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vadda_b(_1, _2); + } + // CHECK-LABEL: @vadda_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vadda.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vadda_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vadda_h(_1, _2); + } + // CHECK-LABEL: @vadda_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vadda.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vadda_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vadda_w(_1, _2); + } + // CHECK-LABEL: @vadda_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadda.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vadda_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vadda_d(_1, _2); + } + // CHECK-LABEL: @vsadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsadd_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsadd_b(_1, _2); + } + // CHECK-LABEL: @vsadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsadd_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsadd_h(_1, _2); + } + // CHECK-LABEL: @vsadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsadd_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsadd_w(_1, _2); + } + // CHECK-LABEL: @vsadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsadd_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsadd_d(_1, _2); + } + // CHECK-LABEL: @vsadd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsadd.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vsadd_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsadd_bu(_1, _2); + } + // CHECK-LABEL: @vsadd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsadd.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vsadd_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsadd_hu(_1, _2); + } + // CHECK-LABEL: @vsadd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsadd.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vsadd_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsadd_wu(_1, _2); + } + // CHECK-LABEL: @vsadd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsadd.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vsadd_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsadd_du(_1, _2); + } + // CHECK-LABEL: @vavg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavg.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vavg_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vavg_b(_1, _2); } + // CHECK-LABEL: @vavg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavg.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vavg_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vavg_h(_1, _2); } + // CHECK-LABEL: @vavg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavg.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vavg_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vavg_w(_1, _2); } + // CHECK-LABEL: @vavg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavg.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vavg_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vavg_d(_1, _2); } + // CHECK-LABEL: @vavg_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavg.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vavg_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vavg_bu(_1, _2); + } + // CHECK-LABEL: @vavg_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavg.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vavg_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vavg_hu(_1, _2); + } + // CHECK-LABEL: @vavg_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavg.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vavg_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vavg_wu(_1, _2); + } + // CHECK-LABEL: @vavg_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavg.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vavg_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vavg_du(_1, _2); + } + // CHECK-LABEL: @vavgr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vavgr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vavgr_b(_1, _2); + } + // CHECK-LABEL: @vavgr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vavgr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vavgr_h(_1, _2); + } + // CHECK-LABEL: @vavgr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vavgr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vavgr_w(_1, _2); + } + // CHECK-LABEL: @vavgr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vavgr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vavgr_d(_1, _2); + } + // CHECK-LABEL: @vavgr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vavgr.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vavgr_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vavgr_bu(_1, _2); + } + // CHECK-LABEL: @vavgr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vavgr.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vavgr_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vavgr_hu(_1, _2); + } + // CHECK-LABEL: @vavgr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vavgr.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vavgr_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vavgr_wu(_1, _2); + } + // CHECK-LABEL: @vavgr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vavgr.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vavgr_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vavgr_du(_1, _2); + } + // CHECK-LABEL: @vssub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssub.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssub_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssub_b(_1, _2); + } + // CHECK-LABEL: @vssub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssub.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssub_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssub_h(_1, _2); + } + // CHECK-LABEL: @vssub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssub.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssub_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssub_w(_1, _2); + } + // CHECK-LABEL: @vssub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssub.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vssub_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssub_d(_1, _2); + } + // CHECK-LABEL: @vssub_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssub.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssub_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vssub_bu(_1, _2); + } + // CHECK-LABEL: @vssub_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssub.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssub_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssub_hu(_1, _2); + } + // CHECK-LABEL: @vssub_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssub.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssub_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssub_wu(_1, _2); + } + // CHECK-LABEL: @vssub_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssub.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vssub_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssub_du(_1, _2); + } + // CHECK-LABEL: @vabsd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vabsd_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vabsd_b(_1, _2); + } + // CHECK-LABEL: @vabsd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vabsd_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vabsd_h(_1, _2); + } + // CHECK-LABEL: @vabsd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vabsd_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vabsd_w(_1, _2); + } + // CHECK-LABEL: @vabsd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vabsd_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vabsd_d(_1, _2); + } + // CHECK-LABEL: @vabsd_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vabsd.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vabsd_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vabsd_bu(_1, _2); + } + // CHECK-LABEL: @vabsd_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vabsd.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vabsd_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vabsd_hu(_1, _2); + } + // CHECK-LABEL: @vabsd_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vabsd.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vabsd_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vabsd_wu(_1, _2); + } + // CHECK-LABEL: @vabsd_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vabsd.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vabsd_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vabsd_du(_1, _2); + } + // CHECK-LABEL: @vmul_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmul.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vmul_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmul_b(_1, _2); } + // CHECK-LABEL: @vmul_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmul.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmul_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmul_h(_1, _2); } + // CHECK-LABEL: @vmul_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmul.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmul_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmul_w(_1, _2); } + // CHECK-LABEL: @vmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmul.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmul_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmul_d(_1, _2); } + // CHECK-LABEL: @vmadd_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmadd.b(<16 x i8> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <16 x i8> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v16i8 vmadd_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmadd_b(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmadd.h(<8 x i16> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8i16 vmadd_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmadd_h(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmadd.w(<4 x i32> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4i32 vmadd_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmadd_w(_1, _2, _3); + } + // CHECK-LABEL: @vmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmadd.d(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmadd_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmsub.b(<16 x i8> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <16 x i8> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v16i8 vmsub_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmsub_b(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmsub.h(<8 x i16> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8i16 vmsub_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmsub_h(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmsub.w(<4 x i32> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4i32 vmsub_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmsub_w(_1, _2, _3); + } + // CHECK-LABEL: @vmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmsub.d(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmsub_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vdiv_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vdiv_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vdiv_b(_1, _2); } + // CHECK-LABEL: @vdiv_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vdiv_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vdiv_h(_1, _2); } + // CHECK-LABEL: @vdiv_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vdiv_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vdiv_w(_1, _2); } + // CHECK-LABEL: @vdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vdiv_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vdiv_d(_1, _2); } + // CHECK-LABEL: @vdiv_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vdiv.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vdiv_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vdiv_bu(_1, _2); + } + // CHECK-LABEL: @vdiv_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vdiv.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vdiv_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vdiv_hu(_1, _2); + } + // CHECK-LABEL: @vdiv_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vdiv.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vdiv_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vdiv_wu(_1, _2); + } + // CHECK-LABEL: @vdiv_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vdiv.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vdiv_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vdiv_du(_1, _2); + } + // CHECK-LABEL: @vhaddw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.h.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vhaddw_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vhaddw_h_b(_1, _2); + } + // CHECK-LABEL: @vhaddw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.w.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vhaddw_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vhaddw_w_h(_1, _2); + } + // CHECK-LABEL: @vhaddw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.d.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vhaddw_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vhaddw_d_w(_1, _2); + } + // CHECK-LABEL: @vhaddw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhaddw.hu.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vhaddw_hu_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vhaddw_hu_bu(_1, _2); + } + // CHECK-LABEL: @vhaddw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhaddw.wu.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vhaddw_wu_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vhaddw_wu_hu(_1, _2); + } + // CHECK-LABEL: @vhaddw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.du.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vhaddw_du_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vhaddw_du_wu(_1, _2); + } + // CHECK-LABEL: @vhsubw_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.h.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vhsubw_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vhsubw_h_b(_1, _2); + } + // CHECK-LABEL: @vhsubw_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.w.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vhsubw_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vhsubw_w_h(_1, _2); + } + // CHECK-LABEL: @vhsubw_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.d.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vhsubw_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vhsubw_d_w(_1, _2); + } + // CHECK-LABEL: @vhsubw_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vhsubw.hu.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vhsubw_hu_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vhsubw_hu_bu(_1, _2); + } + // CHECK-LABEL: @vhsubw_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vhsubw.wu.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vhsubw_wu_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vhsubw_wu_hu(_1, _2); + } + // CHECK-LABEL: @vhsubw_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.du.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vhsubw_du_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vhsubw_du_wu(_1, _2); + } + // CHECK-LABEL: @vmod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmod.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vmod_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmod_b(_1, _2); } + // CHECK-LABEL: @vmod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmod.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmod_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmod_h(_1, _2); } + // CHECK-LABEL: @vmod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmod.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmod_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmod_w(_1, _2); } + // CHECK-LABEL: @vmod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmod.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmod_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmod_d(_1, _2); } + // CHECK-LABEL: @vmod_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmod.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vmod_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmod_bu(_1, _2); + } + // CHECK-LABEL: @vmod_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmod.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vmod_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmod_hu(_1, _2); + } + // CHECK-LABEL: @vmod_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmod.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vmod_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmod_wu(_1, _2); + } + // CHECK-LABEL: @vmod_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmod.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vmod_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmod_du(_1, _2); + } + // CHECK-LABEL: @vreplve_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> _1:%.*, i32 _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vreplve.b(<16 x i8> TMP0, i32 _2:%.*) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vreplve_b(v16i8 _1, int _2) { + return __builtin_lsx_vreplve_b(_1, _2); + } + // CHECK-LABEL: @vreplve_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> _1:%.*, i32 _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vreplve.h(<8 x i16> TMP0, i32 _2:%.*) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vreplve_h(v8i16 _1, int _2) { + return __builtin_lsx_vreplve_h(_1, _2); + } + // CHECK-LABEL: @vreplve_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> _1:%.*, i32 _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vreplve.w(<4 x i32> TMP0, i32 _2:%.*) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vreplve_w(v4i32 _1, int _2) { + return __builtin_lsx_vreplve_w(_1, _2); + } + // CHECK-LABEL: @vreplve_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> _1:%.*, i32 _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vreplve.d(<2 x i64> TMP0, i32 _2:%.*) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vreplve_d(v2i64 _1, int _2) { + return __builtin_lsx_vreplve_d(_1, _2); + } + // CHECK-LABEL: @vreplvei_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vreplvei.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vreplvei_b(v16i8 _1) { return __builtin_lsx_vreplvei_b(_1, 1); } + // CHECK-LABEL: @vreplvei_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vreplvei.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vreplvei_h(v8i16 _1) { return __builtin_lsx_vreplvei_h(_1, 1); } + // CHECK-LABEL: @vreplvei_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vreplvei.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vreplvei_w(v4i32 _1) { return __builtin_lsx_vreplvei_w(_1, 1); } + // CHECK-LABEL: @vreplvei_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vreplvei.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vreplvei_d(v2i64 _1) { return __builtin_lsx_vreplvei_d(_1, 1); } + // CHECK-LABEL: @vpickev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpickev.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vpickev_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpickev_b(_1, _2); + } + // CHECK-LABEL: @vpickev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpickev.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vpickev_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpickev_h(_1, _2); + } + // CHECK-LABEL: @vpickev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpickev.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vpickev_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpickev_w(_1, _2); + } + // CHECK-LABEL: @vpickev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpickev.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vpickev_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpickev_d(_1, _2); + } + // CHECK-LABEL: @vpickod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpickod.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vpickod_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpickod_b(_1, _2); + } + // CHECK-LABEL: @vpickod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpickod.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vpickod_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpickod_h(_1, _2); + } + // CHECK-LABEL: @vpickod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpickod.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vpickod_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpickod_w(_1, _2); + } + // CHECK-LABEL: @vpickod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpickod.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vpickod_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpickod_d(_1, _2); + } + // CHECK-LABEL: @vilvh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vilvh.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vilvh_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vilvh_b(_1, _2); + } + // CHECK-LABEL: @vilvh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vilvh.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vilvh_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vilvh_h(_1, _2); + } + // CHECK-LABEL: @vilvh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vilvh.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vilvh_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vilvh_w(_1, _2); + } + // CHECK-LABEL: @vilvh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vilvh.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vilvh_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vilvh_d(_1, _2); + } + // CHECK-LABEL: @vilvl_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vilvl.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vilvl_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vilvl_b(_1, _2); + } + // CHECK-LABEL: @vilvl_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vilvl.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vilvl_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vilvl_h(_1, _2); + } + // CHECK-LABEL: @vilvl_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vilvl.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vilvl_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vilvl_w(_1, _2); + } + // CHECK-LABEL: @vilvl_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vilvl.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vilvl_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vilvl_d(_1, _2); + } + // CHECK-LABEL: @vpackev_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpackev.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vpackev_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpackev_b(_1, _2); + } + // CHECK-LABEL: @vpackev_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpackev.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vpackev_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpackev_h(_1, _2); + } + // CHECK-LABEL: @vpackev_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpackev.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vpackev_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpackev_w(_1, _2); + } + // CHECK-LABEL: @vpackev_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpackev.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vpackev_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpackev_d(_1, _2); + } + // CHECK-LABEL: @vpackod_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpackod.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vpackod_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vpackod_b(_1, _2); + } + // CHECK-LABEL: @vpackod_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpackod.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vpackod_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vpackod_h(_1, _2); + } + // CHECK-LABEL: @vpackod_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpackod.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vpackod_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpackod_w(_1, _2); + } + // CHECK-LABEL: @vpackod_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpackod.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vpackod_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vpackod_d(_1, _2); + } + // CHECK-LABEL: @vshuf_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vshuf.h(<8 x i16> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8i16 vshuf_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vshuf_h(_1, _2, _3); + } + // CHECK-LABEL: @vshuf_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vshuf.w(<4 x i32> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4i32 vshuf_w(v4i32 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vshuf_w(_1, _2, _3); + } + // CHECK-LABEL: @vshuf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vshuf.d(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vshuf_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vshuf_d(_1, _2, _3); + } + // CHECK-LABEL: @vand_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vand.v(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vand_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vand_v(_1, _2); } + // CHECK-LABEL: @vandi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vandi.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vandi_b(v16u8 _1) { return __builtin_lsx_vandi_b(_1, 1); } + // CHECK-LABEL: @vor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vor.v(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vor_v(_1, _2); } + // CHECK-LABEL: @vori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vori.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vori_b(v16u8 _1) { return __builtin_lsx_vori_b(_1, 1); } + // CHECK-LABEL: @vnor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vnor.v(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vnor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vnor_v(_1, _2); } + // CHECK-LABEL: @vnori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vnori.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vnori_b(v16u8 _1) { return __builtin_lsx_vnori_b(_1, 1); } + // CHECK-LABEL: @vxor_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vxor.v(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vxor_v(v16u8 _1, v16u8 _2) { return __builtin_lsx_vxor_v(_1, _2); } + // CHECK-LABEL: @vxori_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vxori.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16u8 vxori_b(v16u8 _1) { return __builtin_lsx_vxori_b(_1, 1); } + // CHECK-LABEL: @vbitsel_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitsel.v(<16 x i8> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <16 x i8> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v16u8 vbitsel_v(v16u8 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vbitsel_v(_1, _2, _3); + } + // CHECK-LABEL: @vbitseli_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbitseli.b(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vbitseli_b(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vbitseli_b(_1, _2, 1); + } + // CHECK-LABEL: @vshuf4i_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vshuf4i.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vshuf4i_b(v16i8 _1) { return __builtin_lsx_vshuf4i_b(_1, 1); } + // CHECK-LABEL: @vshuf4i_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vshuf4i.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vshuf4i_h(v8i16 _1) { return __builtin_lsx_vshuf4i_h(_1, 1); } + // CHECK-LABEL: @vshuf4i_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vshuf4i.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vshuf4i_w(v4i32 _1) { return __builtin_lsx_vshuf4i_w(_1, 1); } + // CHECK-LABEL: @vreplgr2vr_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vreplgr2vr.b(i32 _1:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <16 x i8> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v16i8 vreplgr2vr_b(int _1) { return __builtin_lsx_vreplgr2vr_b(_1); } + // CHECK-LABEL: @vreplgr2vr_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vreplgr2vr.h(i32 _1:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <8 x i16> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v8i16 vreplgr2vr_h(int _1) { return __builtin_lsx_vreplgr2vr_h(_1); } + // CHECK-LABEL: @vreplgr2vr_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vreplgr2vr.w(i32 _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x i32> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v4i32 vreplgr2vr_w(int _1) { return __builtin_lsx_vreplgr2vr_w(_1); } + // CHECK-LABEL: @vreplgr2vr_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vreplgr2vr.d(i64 _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <2 x i64> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v2i64 vreplgr2vr_d(long _1) { return __builtin_lsx_vreplgr2vr_d(_1); } + // CHECK-LABEL: @vpcnt_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vpcnt.b(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vpcnt_b(v16i8 _1) { return __builtin_lsx_vpcnt_b(_1); } + // CHECK-LABEL: @vpcnt_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vpcnt.h(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vpcnt_h(v8i16 _1) { return __builtin_lsx_vpcnt_h(_1); } + // CHECK-LABEL: @vpcnt_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpcnt.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vpcnt_w(v4i32 _1) { return __builtin_lsx_vpcnt_w(_1); } + // CHECK-LABEL: @vpcnt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vpcnt.d(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vpcnt_d(v2i64 _1) { return __builtin_lsx_vpcnt_d(_1); } + // CHECK-LABEL: @vclo_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vclo.b(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vclo_b(v16i8 _1) { return __builtin_lsx_vclo_b(_1); } + // CHECK-LABEL: @vclo_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vclo.h(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vclo_h(v8i16 _1) { return __builtin_lsx_vclo_h(_1); } + // CHECK-LABEL: @vclo_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vclo.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vclo_w(v4i32 _1) { return __builtin_lsx_vclo_w(_1); } + // CHECK-LABEL: @vclo_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vclo.d(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vclo_d(v2i64 _1) { return __builtin_lsx_vclo_d(_1); } + // CHECK-LABEL: @vclz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vclz.b(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vclz_b(v16i8 _1) { return __builtin_lsx_vclz_b(_1); } + // CHECK-LABEL: @vclz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vclz.h(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vclz_h(v8i16 _1) { return __builtin_lsx_vclz_h(_1); } + // CHECK-LABEL: @vclz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vclz.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vclz_w(v4i32 _1) { return __builtin_lsx_vclz_w(_1); } + // CHECK-LABEL: @vclz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vclz.d(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vclz_d(v2i64 _1) { return __builtin_lsx_vclz_d(_1); } + // CHECK-LABEL: @vpickve2gr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: ret i32 TMP1 + // + int vpickve2gr_b(v16i8 _1) { return __builtin_lsx_vpickve2gr_b(_1, 1); } + // CHECK-LABEL: @vpickve2gr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: ret i32 TMP1 + // + int vpickve2gr_h(v8i16 _1) { return __builtin_lsx_vpickve2gr_h(_1, 1); } + // CHECK-LABEL: @vpickve2gr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: ret i32 TMP1 + // + int vpickve2gr_w(v4i32 _1) { return __builtin_lsx_vpickve2gr_w(_1, 1); } + // CHECK-LABEL: @vpickve2gr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret i64 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call i64 @llvm.loongarch.lsx.vpickve2gr.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: ret i64 TMP1 + // + long vpickve2gr_d(v2i64 _1) { return __builtin_lsx_vpickve2gr_d(_1, 1); } + // CHECK-LABEL: @vpickve2gr_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: ret i32 TMP1 + // + unsigned int vpickve2gr_bu(v16i8 _1) { + return __builtin_lsx_vpickve2gr_bu(_1, 1); + } + // CHECK-LABEL: @vpickve2gr_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: ret i32 TMP1 + // + unsigned int vpickve2gr_hu(v8i16 _1) { + return __builtin_lsx_vpickve2gr_hu(_1, 1); + } + // CHECK-LABEL: @vpickve2gr_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.vpickve2gr.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: ret i32 TMP1 + // + unsigned int vpickve2gr_wu(v4i32 _1) { + return __builtin_lsx_vpickve2gr_wu(_1, 1); + } + // CHECK-LABEL: @vpickve2gr_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret i64 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call i64 @llvm.loongarch.lsx.vpickve2gr.du(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: ret i64 TMP1 + // + unsigned long int vpickve2gr_du(v2i64 _1) { + return __builtin_lsx_vpickve2gr_du(_1, 1); + } + // CHECK-LABEL: @vinsgr2vr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> _1:%.*, i32 1, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vinsgr2vr.b(<16 x i8> TMP0, i32 1, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vinsgr2vr_b(v16i8 _1) { + return __builtin_lsx_vinsgr2vr_b(_1, 1, 1); + } + // CHECK-LABEL: @vinsgr2vr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> _1:%.*, i32 1, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vinsgr2vr.h(<8 x i16> TMP0, i32 1, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vinsgr2vr_h(v8i16 _1) { + return __builtin_lsx_vinsgr2vr_h(_1, 1, 1); + } + // CHECK-LABEL: @vinsgr2vr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> _1:%.*, i32 1, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vinsgr2vr.w(<4 x i32> TMP0, i32 1, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vinsgr2vr_w(v4i32 _1) { + return __builtin_lsx_vinsgr2vr_w(_1, 1, 1); + } + // CHECK-LABEL: @vinsgr2vr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> _1:%.*, i64 1, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vinsgr2vr.d(<2 x i64> TMP0, i64 1, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vinsgr2vr_d(v2i64 _1) { + return __builtin_lsx_vinsgr2vr_d(_1, 1, 1); + } + // CHECK-LABEL: @vfadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfadd.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfadd_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfadd_s(_1, _2); + } + // CHECK-LABEL: @vfadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfadd.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x double> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2f64 vfadd_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfadd_d(_1, _2); + } + // CHECK-LABEL: @vfsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfsub.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfsub_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfsub_s(_1, _2); + } + // CHECK-LABEL: @vfsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfsub.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x double> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2f64 vfsub_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfsub_d(_1, _2); + } + // CHECK-LABEL: @vfmul_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmul.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfmul_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmul_s(_1, _2); + } + // CHECK-LABEL: @vfmul_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmul.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x double> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2f64 vfmul_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmul_d(_1, _2); + } + // CHECK-LABEL: @vfdiv_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfdiv.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfdiv_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfdiv_s(_1, _2); + } + // CHECK-LABEL: @vfdiv_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfdiv.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x double> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2f64 vfdiv_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfdiv_d(_1, _2); + } + // CHECK-LABEL: @vfcvt_h_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfcvt.h.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vfcvt_h_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcvt_h_s(_1, _2); + } + // CHECK-LABEL: @vfcvt_s_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvt.s.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfcvt_s_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcvt_s_d(_1, _2); + } + // CHECK-LABEL: @vfmin_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmin.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfmin_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmin_s(_1, _2); + } + // CHECK-LABEL: @vfmin_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmin.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x double> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2f64 vfmin_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmin_d(_1, _2); + } + // CHECK-LABEL: @vfmina_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmina.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfmina_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmina_s(_1, _2); + } + // CHECK-LABEL: @vfmina_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmina.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x double> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2f64 vfmina_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmina_d(_1, _2); + } + // CHECK-LABEL: @vfmax_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmax.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfmax_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmax_s(_1, _2); + } + // CHECK-LABEL: @vfmax_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmax.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x double> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2f64 vfmax_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmax_d(_1, _2); + } + // CHECK-LABEL: @vfmaxa_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmaxa.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vfmaxa_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfmaxa_s(_1, _2); + } + // CHECK-LABEL: @vfmaxa_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmaxa.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x double> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2f64 vfmaxa_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfmaxa_d(_1, _2); + } + // CHECK-LABEL: @vfclass_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfclass.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vfclass_s(v4f32 _1) { return __builtin_lsx_vfclass_s(_1); } + // CHECK-LABEL: @vfclass_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfclass.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vfclass_d(v2f64 _1) { return __builtin_lsx_vfclass_d(_1); } + // CHECK-LABEL: @vfsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfsqrt.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vfsqrt_s(v4f32 _1) { return __builtin_lsx_vfsqrt_s(_1); } + // CHECK-LABEL: @vfsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfsqrt.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vfsqrt_d(v2f64 _1) { return __builtin_lsx_vfsqrt_d(_1); } + // CHECK-LABEL: @vfrecip_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrecip.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vfrecip_s(v4f32 _1) { return __builtin_lsx_vfrecip_s(_1); } + // CHECK-LABEL: @vfrecip_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrecip.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vfrecip_d(v2f64 _1) { return __builtin_lsx_vfrecip_d(_1); } + // CHECK-LABEL: @vfrint_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrint.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vfrint_s(v4f32 _1) { return __builtin_lsx_vfrint_s(_1); } + // CHECK-LABEL: @vfrint_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrint.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vfrint_d(v2f64 _1) { return __builtin_lsx_vfrint_d(_1); } + // CHECK-LABEL: @vfrsqrt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrsqrt.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vfrsqrt_s(v4f32 _1) { return __builtin_lsx_vfrsqrt_s(_1); } + // CHECK-LABEL: @vfrsqrt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrsqrt.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vfrsqrt_d(v2f64 _1) { return __builtin_lsx_vfrsqrt_d(_1); } + // CHECK-LABEL: @vflogb_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vflogb.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vflogb_s(v4f32 _1) { return __builtin_lsx_vflogb_s(_1); } + // CHECK-LABEL: @vflogb_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vflogb.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vflogb_d(v2f64 _1) { return __builtin_lsx_vflogb_d(_1); } + // CHECK-LABEL: @vfcvth_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvth.s.h(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vfcvth_s_h(v8i16 _1) { return __builtin_lsx_vfcvth_s_h(_1); } + // CHECK-LABEL: @vfcvth_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfcvth.d.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vfcvth_d_s(v4f32 _1) { return __builtin_lsx_vfcvth_d_s(_1); } + // CHECK-LABEL: @vfcvtl_s_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfcvtl.s.h(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vfcvtl_s_h(v8i16 _1) { return __builtin_lsx_vfcvtl_s_h(_1); } + // CHECK-LABEL: @vfcvtl_d_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfcvtl.d.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vfcvtl_d_s(v4f32 _1) { return __builtin_lsx_vfcvtl_d_s(_1); } + // CHECK-LABEL: @vftint_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vftint_w_s(v4f32 _1) { return __builtin_lsx_vftint_w_s(_1); } + // CHECK-LABEL: @vftint_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftint.l.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftint_l_d(v2f64 _1) { return __builtin_lsx_vftint_l_d(_1); } + // CHECK-LABEL: @vftint_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.wu.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vftint_wu_s(v4f32 _1) { return __builtin_lsx_vftint_wu_s(_1); } + // CHECK-LABEL: @vftint_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftint.lu.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vftint_lu_d(v2f64 _1) { return __builtin_lsx_vftint_lu_d(_1); } + // CHECK-LABEL: @vftintrz_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vftintrz_w_s(v4f32 _1) { return __builtin_lsx_vftintrz_w_s(_1); } + // CHECK-LABEL: @vftintrz_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.l.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrz_l_d(v2f64 _1) { return __builtin_lsx_vftintrz_l_d(_1); } + // CHECK-LABEL: @vftintrz_wu_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.wu.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vftintrz_wu_s(v4f32 _1) { return __builtin_lsx_vftintrz_wu_s(_1); } + // CHECK-LABEL: @vftintrz_lu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrz.lu.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vftintrz_lu_d(v2f64 _1) { return __builtin_lsx_vftintrz_lu_d(_1); } + // CHECK-LABEL: @vffint_s_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vffint_s_w(v4i32 _1) { return __builtin_lsx_vffint_s_w(_1); } + // CHECK-LABEL: @vffint_d_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.l(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vffint_d_l(v2i64 _1) { return __builtin_lsx_vffint_d_l(_1); } + // CHECK-LABEL: @vffint_s_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.wu(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4f32 vffint_s_wu(v4u32 _1) { return __builtin_lsx_vffint_s_wu(_1); } + // CHECK-LABEL: @vffint_d_lu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffint.d.lu(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vffint_d_lu(v2u64 _1) { return __builtin_lsx_vffint_d_lu(_1); } + // CHECK-LABEL: @vandn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vandn.v(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vandn_v(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vandn_v(_1, _2); + } + // CHECK-LABEL: @vneg_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vneg.b(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vneg_b(v16i8 _1) { return __builtin_lsx_vneg_b(_1); } + // CHECK-LABEL: @vneg_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vneg.h(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vneg_h(v8i16 _1) { return __builtin_lsx_vneg_h(_1); } + // CHECK-LABEL: @vneg_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vneg.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vneg_w(v4i32 _1) { return __builtin_lsx_vneg_w(_1); } + // CHECK-LABEL: @vneg_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vneg.d(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vneg_d(v2i64 _1) { return __builtin_lsx_vneg_d(_1); } + // CHECK-LABEL: @vmuh_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vmuh_b(v16i8 _1, v16i8 _2) { return __builtin_lsx_vmuh_b(_1, _2); } + // CHECK-LABEL: @vmuh_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmuh_h(v8i16 _1, v8i16 _2) { return __builtin_lsx_vmuh_h(_1, _2); } + // CHECK-LABEL: @vmuh_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmuh_w(v4i32 _1, v4i32 _2) { return __builtin_lsx_vmuh_w(_1, _2); } + // CHECK-LABEL: @vmuh_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmuh_d(v2i64 _1, v2i64 _2) { return __builtin_lsx_vmuh_d(_1, _2); } + // CHECK-LABEL: @vmuh_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmuh.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vmuh_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmuh_bu(_1, _2); + } + // CHECK-LABEL: @vmuh_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmuh.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vmuh_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmuh_hu(_1, _2); + } + // CHECK-LABEL: @vmuh_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmuh.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vmuh_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmuh_wu(_1, _2); + } + // CHECK-LABEL: @vmuh_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmuh.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vmuh_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmuh_du(_1, _2); + } + // CHECK-LABEL: @vsllwil_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.h.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vsllwil_h_b(v16i8 _1) { return __builtin_lsx_vsllwil_h_b(_1, 1); } + // CHECK-LABEL: @vsllwil_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.w.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vsllwil_w_h(v8i16 _1) { return __builtin_lsx_vsllwil_w_h(_1, 1); } + // CHECK-LABEL: @vsllwil_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.d.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vsllwil_d_w(v4i32 _1) { return __builtin_lsx_vsllwil_d_w(_1, 1); } + // CHECK-LABEL: @vsllwil_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsllwil.hu.bu(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8u16 vsllwil_hu_bu(v16u8 _1) { + return __builtin_lsx_vsllwil_hu_bu(_1, 1); + } + // CHECK-LABEL: @vsllwil_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsllwil.wu.hu(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vsllwil_wu_hu(v8u16 _1) { + return __builtin_lsx_vsllwil_wu_hu(_1, 1); + } + // CHECK-LABEL: @vsllwil_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsllwil.du.wu(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vsllwil_du_wu(v4u32 _1) { + return __builtin_lsx_vsllwil_du_wu(_1, 1); + } + // CHECK-LABEL: @vsran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsran.b.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsran_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsran_b_h(_1, _2); + } + // CHECK-LABEL: @vsran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsran.h.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsran_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsran_h_w(_1, _2); + } + // CHECK-LABEL: @vsran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsran.w.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsran_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsran_w_d(_1, _2); + } + // CHECK-LABEL: @vssran_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssran.b.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssran_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssran_b_h(_1, _2); + } + // CHECK-LABEL: @vssran_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssran.h.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssran_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssran_h_w(_1, _2); + } + // CHECK-LABEL: @vssran_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssran.w.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssran_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssran_w_d(_1, _2); + } + // CHECK-LABEL: @vssran_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssran.bu.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssran_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssran_bu_h(_1, _2); + } + // CHECK-LABEL: @vssran_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssran.hu.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssran_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssran_hu_w(_1, _2); + } + // CHECK-LABEL: @vssran_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssran.wu.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssran_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssran_wu_d(_1, _2); + } + // CHECK-LABEL: @vsrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrarn.b.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrarn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrarn_b_h(_1, _2); + } + // CHECK-LABEL: @vsrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrarn.h.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrarn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrarn_h_w(_1, _2); + } + // CHECK-LABEL: @vsrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrarn.w.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrarn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrarn_w_d(_1, _2); + } + // CHECK-LABEL: @vssrarn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.b.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssrarn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrarn_b_h(_1, _2); + } + // CHECK-LABEL: @vssrarn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.h.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssrarn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrarn_h_w(_1, _2); + } + // CHECK-LABEL: @vssrarn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.w.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssrarn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrarn_w_d(_1, _2); + } + // CHECK-LABEL: @vssrarn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarn.bu.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssrarn_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrarn_bu_h(_1, _2); + } + // CHECK-LABEL: @vssrarn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarn.hu.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssrarn_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrarn_hu_w(_1, _2); + } + // CHECK-LABEL: @vssrarn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarn.wu.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssrarn_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrarn_wu_d(_1, _2); + } + // CHECK-LABEL: @vsrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrln.b.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrln_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrln_b_h(_1, _2); + } + // CHECK-LABEL: @vsrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrln.h.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrln_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrln_h_w(_1, _2); + } + // CHECK-LABEL: @vsrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrln.w.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrln_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrln_w_d(_1, _2); + } + // CHECK-LABEL: @vssrln_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.bu.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssrln_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrln_bu_h(_1, _2); + } + // CHECK-LABEL: @vssrln_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.hu.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssrln_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrln_hu_w(_1, _2); + } + // CHECK-LABEL: @vssrln_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.wu.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssrln_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrln_wu_d(_1, _2); + } + // CHECK-LABEL: @vsrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrn.b.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrlrn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlrn_b_h(_1, _2); + } + // CHECK-LABEL: @vsrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrn.h.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrlrn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlrn_h_w(_1, _2); + } + // CHECK-LABEL: @vsrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrn.w.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrlrn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlrn_w_d(_1, _2); + } + // CHECK-LABEL: @vssrlrn_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.bu.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssrlrn_bu_h(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vssrlrn_bu_h(_1, _2); + } + // CHECK-LABEL: @vssrlrn_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.hu.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssrlrn_hu_w(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vssrlrn_hu_w(_1, _2); + } + // CHECK-LABEL: @vssrlrn_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.wu.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssrlrn_wu_d(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vssrlrn_wu_d(_1, _2); + } + // CHECK-LABEL: @vfrstpi_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vfrstpi.b(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vfrstpi_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vfrstpi_b(_1, _2, 1); + } + // CHECK-LABEL: @vfrstpi_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfrstpi.h(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vfrstpi_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vfrstpi_h(_1, _2, 1); + } + // CHECK-LABEL: @vfrstp_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vfrstp.b(<16 x i8> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <16 x i8> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v16i8 vfrstp_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vfrstp_b(_1, _2, _3); + } + // CHECK-LABEL: @vfrstp_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vfrstp.h(<8 x i16> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8i16 vfrstp_h(v8i16 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vfrstp_h(_1, _2, _3); + } + // CHECK-LABEL: @vshuf4i_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vshuf4i.d(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vshuf4i_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vshuf4i_d(_1, _2, 1); + } + // CHECK-LABEL: @vbsrl_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbsrl.v(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vbsrl_v(v16i8 _1) { return __builtin_lsx_vbsrl_v(_1, 1); } + // CHECK-LABEL: @vbsll_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vbsll.v(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vbsll_v(v16i8 _1) { return __builtin_lsx_vbsll_v(_1, 1); } + // CHECK-LABEL: @vextrins_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vextrins.b(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vextrins_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vextrins_b(_1, _2, 1); + } + // CHECK-LABEL: @vextrins_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vextrins.h(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vextrins_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vextrins_h(_1, _2, 1); + } + // CHECK-LABEL: @vextrins_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vextrins.w(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vextrins_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vextrins_w(_1, _2, 1); + } + // CHECK-LABEL: @vextrins_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextrins.d(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vextrins_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vextrins_d(_1, _2, 1); + } + // CHECK-LABEL: @vmskltz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmskltz.b(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vmskltz_b(v16i8 _1) { return __builtin_lsx_vmskltz_b(_1); } + // CHECK-LABEL: @vmskltz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmskltz.h(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vmskltz_h(v8i16 _1) { return __builtin_lsx_vmskltz_h(_1); } + // CHECK-LABEL: @vmskltz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmskltz.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vmskltz_w(v4i32 _1) { return __builtin_lsx_vmskltz_w(_1); } + // CHECK-LABEL: @vmskltz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmskltz.d(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vmskltz_d(v2i64 _1) { return __builtin_lsx_vmskltz_d(_1); } + // CHECK-LABEL: @vsigncov_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsigncov.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsigncov_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsigncov_b(_1, _2); + } + // CHECK-LABEL: @vsigncov_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsigncov.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsigncov_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsigncov_h(_1, _2); + } + // CHECK-LABEL: @vsigncov_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsigncov.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsigncov_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsigncov_w(_1, _2); + } + // CHECK-LABEL: @vsigncov_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsigncov.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsigncov_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsigncov_d(_1, _2); + } + // CHECK-LABEL: @vfmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> _1:%.*, <4 x float> _2:%.*, <4 x float> _3:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmadd.s(<4 x float> TMP0, <4 x float> TMP1, <4 x float> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x float> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4f32 vfmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfmadd_s(_1, _2, _3); + } + // CHECK-LABEL: @vfmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> _1:%.*, <2 x double> _2:%.*, <2 x double> _3:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmadd.d(<2 x double> TMP0, <2 x double> TMP1, <2 x double> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x double> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2f64 vfmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vfmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> _1:%.*, <4 x float> _2:%.*, <4 x float> _3:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfmsub.s(<4 x float> TMP0, <4 x float> TMP1, <4 x float> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x float> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4f32 vfmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfmsub_s(_1, _2, _3); + } + // CHECK-LABEL: @vfmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> _1:%.*, <2 x double> _2:%.*, <2 x double> _3:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfmsub.d(<2 x double> TMP0, <2 x double> TMP1, <2 x double> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x double> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2f64 vfmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vfnmadd_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> _1:%.*, <4 x float> _2:%.*, <4 x float> _3:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfnmadd.s(<4 x float> TMP0, <4 x float> TMP1, <4 x float> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x float> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4f32 vfnmadd_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfnmadd_s(_1, _2, _3); + } + // CHECK-LABEL: @vfnmadd_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> _1:%.*, <2 x double> _2:%.*, <2 x double> _3:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfnmadd.d(<2 x double> TMP0, <2 x double> TMP1, <2 x double> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x double> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2f64 vfnmadd_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfnmadd_d(_1, _2, _3); + } + // CHECK-LABEL: @vfnmsub_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> _1:%.*, <4 x float> _2:%.*, <4 x float> _3:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfnmsub.s(<4 x float> TMP0, <4 x float> TMP1, <4 x float> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x float> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4f32 vfnmsub_s(v4f32 _1, v4f32 _2, v4f32 _3) { + return __builtin_lsx_vfnmsub_s(_1, _2, _3); + } + // CHECK-LABEL: @vfnmsub_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> _1:%.*, <2 x double> _2:%.*, <2 x double> _3:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfnmsub.d(<2 x double> TMP0, <2 x double> TMP1, <2 x double> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x double> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2f64 vfnmsub_d(v2f64 _1, v2f64 _2, v2f64 _3) { + return __builtin_lsx_vfnmsub_d(_1, _2, _3); + } + // CHECK-LABEL: @vftintrne_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vftintrne_w_s(v4f32 _1) { return __builtin_lsx_vftintrne_w_s(_1); } + // CHECK-LABEL: @vftintrne_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrne.l.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrne_l_d(v2f64 _1) { return __builtin_lsx_vftintrne_l_d(_1); } + // CHECK-LABEL: @vftintrp_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vftintrp_w_s(v4f32 _1) { return __builtin_lsx_vftintrp_w_s(_1); } + // CHECK-LABEL: @vftintrp_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrp.l.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrp_l_d(v2f64 _1) { return __builtin_lsx_vftintrp_l_d(_1); } + // CHECK-LABEL: @vftintrm_w_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vftintrm_w_s(v4f32 _1) { return __builtin_lsx_vftintrm_w_s(_1); } + // CHECK-LABEL: @vftintrm_l_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrm.l.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrm_l_d(v2f64 _1) { return __builtin_lsx_vftintrm_l_d(_1); } + // CHECK-LABEL: @vftint_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftint.w.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vftint_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftint_w_d(_1, _2); + } + // CHECK-LABEL: @vffint_s_l( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x float> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x float> @llvm.loongarch.lsx.vffint.s.l(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x float> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4f32 vffint_s_l(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vffint_s_l(_1, _2); + } + // CHECK-LABEL: @vftintrz_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrz.w.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vftintrz_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrz_w_d(_1, _2); + } + // CHECK-LABEL: @vftintrp_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrp.w.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vftintrp_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrp_w_d(_1, _2); + } + // CHECK-LABEL: @vftintrm_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrm.w.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vftintrm_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrm_w_d(_1, _2); + } + // CHECK-LABEL: @vftintrne_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vftintrne.w.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vftintrne_w_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vftintrne_w_d(_1, _2); + } + // CHECK-LABEL: @vftintl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintl.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintl_l_s(v4f32 _1) { return __builtin_lsx_vftintl_l_s(_1); } + // CHECK-LABEL: @vftinth_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftinth.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftinth_l_s(v4f32 _1) { return __builtin_lsx_vftinth_l_s(_1); } + // CHECK-LABEL: @vffinth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffinth.d.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vffinth_d_w(v4i32 _1) { return __builtin_lsx_vffinth_d_w(_1); } + // CHECK-LABEL: @vffintl_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <2 x double> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vffintl.d.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2f64 vffintl_d_w(v4i32 _1) { return __builtin_lsx_vffintl_d_w(_1); } + // CHECK-LABEL: @vftintrzl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzl.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrzl_l_s(v4f32 _1) { return __builtin_lsx_vftintrzl_l_s(_1); } + // CHECK-LABEL: @vftintrzh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrzh.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrzh_l_s(v4f32 _1) { return __builtin_lsx_vftintrzh_l_s(_1); } + // CHECK-LABEL: @vftintrpl_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrpl.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrpl_l_s(v4f32 _1) { return __builtin_lsx_vftintrpl_l_s(_1); } + // CHECK-LABEL: @vftintrph_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrph.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrph_l_s(v4f32 _1) { return __builtin_lsx_vftintrph_l_s(_1); } + // CHECK-LABEL: @vftintrml_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrml.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrml_l_s(v4f32 _1) { return __builtin_lsx_vftintrml_l_s(_1); } + // CHECK-LABEL: @vftintrmh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrmh.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrmh_l_s(v4f32 _1) { return __builtin_lsx_vftintrmh_l_s(_1); } + // CHECK-LABEL: @vftintrnel_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrnel.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrnel_l_s(v4f32 _1) { + return __builtin_lsx_vftintrnel_l_s(_1); + } + // CHECK-LABEL: @vftintrneh_l_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vftintrneh.l.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vftintrneh_l_s(v4f32 _1) { + return __builtin_lsx_vftintrneh_l_s(_1); + } + // CHECK-LABEL: @vfrintrne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <4 x float> TMP0 to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> TMP1 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrne.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vfrintrne_s(v4f32 _1) { return __builtin_lsx_vfrintrne_s(_1); } + // CHECK-LABEL: @vfrintrne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <2 x double> TMP0 to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> TMP1 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrne.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vfrintrne_d(v2f64 _1) { return __builtin_lsx_vfrintrne_d(_1); } + // CHECK-LABEL: @vfrintrz_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <4 x float> TMP0 to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> TMP1 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrz.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vfrintrz_s(v4f32 _1) { return __builtin_lsx_vfrintrz_s(_1); } + // CHECK-LABEL: @vfrintrz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <2 x double> TMP0 to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> TMP1 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrz.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vfrintrz_d(v2f64 _1) { return __builtin_lsx_vfrintrz_d(_1); } + // CHECK-LABEL: @vfrintrp_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <4 x float> TMP0 to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> TMP1 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrp.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vfrintrp_s(v4f32 _1) { return __builtin_lsx_vfrintrp_s(_1); } + // CHECK-LABEL: @vfrintrp_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <2 x double> TMP0 to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> TMP1 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrp.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vfrintrp_d(v2f64 _1) { return __builtin_lsx_vfrintrp_d(_1); } + // CHECK-LABEL: @vfrintrm_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <4 x float> TMP0 to <4 x i32> +-// CHECK-NEXT: ret <4 x i32> TMP1 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x float> @llvm.loongarch.lsx.vfrintrm.s(<4 x float> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x float> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vfrintrm_s(v4f32 _1) { return __builtin_lsx_vfrintrm_s(_1); } + // CHECK-LABEL: @vfrintrm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> _1:%.*) +-// CHECK-NEXT: TMP1:%.* = bitcast <2 x double> TMP0 to <2 x i64> +-// CHECK-NEXT: ret <2 x i64> TMP1 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x double> @llvm.loongarch.lsx.vfrintrm.d(<2 x double> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x double> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vfrintrm_d(v2f64 _1) { return __builtin_lsx_vfrintrm_d(_1); } + // CHECK-LABEL: @vstelm_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> _1:%.*, ptr _2:%.*, i32 1, i32 1) ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.b(<16 x i8> TMP0, ptr _2:%.*, i32 1, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_b(v16i8 _1, void *_2) { +@@ -3349,7 +4577,8 @@ void vstelm_b(v16i8 _1, void *_2) { + } + // CHECK-LABEL: @vstelm_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> _1:%.*, ptr _2:%.*, i32 2, i32 1) ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.h(<8 x i16> TMP0, ptr _2:%.*, i32 2, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_h(v8i16 _1, void *_2) { +@@ -3357,7 +4586,8 @@ void vstelm_h(v8i16 _1, void *_2) { + } + // CHECK-LABEL: @vstelm_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> _1:%.*, ptr _2:%.*, i32 4, i32 1) ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.w(<4 x i32> TMP0, ptr _2:%.*, i32 4, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_w(v4i32 _1, void *_2) { +@@ -3365,7 +4595,8 @@ void vstelm_w(v4i32 _1, void *_2) { + } + // CHECK-LABEL: @vstelm_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> _1:%.*, ptr _2:%.*, i32 8, i32 1) ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstelm.d(<2 x i64> TMP0, ptr _2:%.*, i32 8, i32 1) + // CHECK-NEXT: ret void + // + void vstelm_d(v2i64 _1, void *_2) { +@@ -3373,1286 +4604,1785 @@ void vstelm_d(v2i64 _1, void *_2) { + } + // CHECK-LABEL: @vaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vaddwev_d_w(_1, _2); + } + // CHECK-LABEL: @vaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vaddwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vaddwev_w_h(_1, _2); + } + // CHECK-LABEL: @vaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vaddwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vaddwev_h_b(_1, _2); + } + // CHECK-LABEL: @vaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vaddwod_d_w(_1, _2); + } + // CHECK-LABEL: @vaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vaddwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vaddwod_w_h(_1, _2); + } + // CHECK-LABEL: @vaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vaddwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vaddwod_h_b(_1, _2); + } + // CHECK-LABEL: @vaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vaddwev_d_wu(_1, _2); + } + // CHECK-LABEL: @vaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vaddwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vaddwev_w_hu(_1, _2); + } + // CHECK-LABEL: @vaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vaddwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vaddwev_h_bu(_1, _2); + } + // CHECK-LABEL: @vaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vaddwod_d_wu(_1, _2); + } + // CHECK-LABEL: @vaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vaddwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vaddwod_w_hu(_1, _2); + } + // CHECK-LABEL: @vaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vaddwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vaddwod_h_bu(_1, _2); + } + // CHECK-LABEL: @vaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.d.wu.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vaddwev_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwev.w.hu.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vaddwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vaddwev_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwev.h.bu.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vaddwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vaddwev_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.d.wu.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vaddwod_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vaddwod.w.hu.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vaddwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vaddwod_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vaddwod.h.bu.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vaddwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vaddwod_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vsubwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsubwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsubwev_d_w(_1, _2); + } + // CHECK-LABEL: @vsubwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsubwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsubwev_w_h(_1, _2); + } + // CHECK-LABEL: @vsubwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsubwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsubwev_h_b(_1, _2); + } + // CHECK-LABEL: @vsubwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsubwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsubwod_d_w(_1, _2); + } + // CHECK-LABEL: @vsubwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsubwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsubwod_w_h(_1, _2); + } + // CHECK-LABEL: @vsubwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsubwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsubwod_h_b(_1, _2); + } + // CHECK-LABEL: @vsubwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.d.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsubwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsubwev_d_wu(_1, _2); + } + // CHECK-LABEL: @vsubwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwev.w.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsubwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsubwev_w_hu(_1, _2); + } + // CHECK-LABEL: @vsubwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwev.h.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsubwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsubwev_h_bu(_1, _2); + } + // CHECK-LABEL: @vsubwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.d.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsubwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vsubwod_d_wu(_1, _2); + } + // CHECK-LABEL: @vsubwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsubwod.w.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsubwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vsubwod_w_hu(_1, _2); + } + // CHECK-LABEL: @vsubwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsubwod.h.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsubwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vsubwod_h_bu(_1, _2); + } + // CHECK-LABEL: @vaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vaddwev_q_d(_1, _2); + } + // CHECK-LABEL: @vaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vaddwod_q_d(_1, _2); + } + // CHECK-LABEL: @vaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vaddwev_q_du(_1, _2); + } + // CHECK-LABEL: @vaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vaddwod_q_du(_1, _2); + } + // CHECK-LABEL: @vsubwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsubwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsubwev_q_d(_1, _2); + } + // CHECK-LABEL: @vsubwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsubwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsubwod_q_d(_1, _2); + } + // CHECK-LABEL: @vsubwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwev.q.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsubwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsubwev_q_du(_1, _2); + } + // CHECK-LABEL: @vsubwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsubwod.q.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsubwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vsubwod_q_du(_1, _2); + } + // CHECK-LABEL: @vaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwev.q.du.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwev_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vaddwev_q_du_d(_1, _2); + } + // CHECK-LABEL: @vaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vaddwod.q.du.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vaddwod_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vaddwod_q_du_d(_1, _2); + } + // CHECK-LABEL: @vmulwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwev_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vmulwev_d_w(_1, _2); + } + // CHECK-LABEL: @vmulwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmulwev_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vmulwev_w_h(_1, _2); + } + // CHECK-LABEL: @vmulwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmulwev_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vmulwev_h_b(_1, _2); + } + // CHECK-LABEL: @vmulwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwod_d_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vmulwod_d_w(_1, _2); + } + // CHECK-LABEL: @vmulwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmulwod_w_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vmulwod_w_h(_1, _2); + } + // CHECK-LABEL: @vmulwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmulwod_h_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vmulwod_h_b(_1, _2); + } + // CHECK-LABEL: @vmulwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwev_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmulwev_d_wu(_1, _2); + } + // CHECK-LABEL: @vmulwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmulwev_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmulwev_w_hu(_1, _2); + } + // CHECK-LABEL: @vmulwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmulwev_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmulwev_h_bu(_1, _2); + } + // CHECK-LABEL: @vmulwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwod_d_wu(v4u32 _1, v4u32 _2) { + return __builtin_lsx_vmulwod_d_wu(_1, _2); + } + // CHECK-LABEL: @vmulwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmulwod_w_hu(v8u16 _1, v8u16 _2) { + return __builtin_lsx_vmulwod_w_hu(_1, _2); + } + // CHECK-LABEL: @vmulwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmulwod_h_bu(v16u8 _1, v16u8 _2) { + return __builtin_lsx_vmulwod_h_bu(_1, _2); + } + // CHECK-LABEL: @vmulwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.d.wu.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwev_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vmulwev_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vmulwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwev.w.hu.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmulwev_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vmulwev_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vmulwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwev.h.bu.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmulwev_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vmulwev_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vmulwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.d.wu.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwod_d_wu_w(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vmulwod_d_wu_w(_1, _2); + } + // CHECK-LABEL: @vmulwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmulwod.w.hu.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vmulwod_w_hu_h(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vmulwod_w_hu_h(_1, _2); + } + // CHECK-LABEL: @vmulwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmulwod.h.bu.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vmulwod_h_bu_b(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vmulwod_h_bu_b(_1, _2); + } + // CHECK-LABEL: @vmulwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwev_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vmulwev_q_d(_1, _2); + } + // CHECK-LABEL: @vmulwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwod_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vmulwod_q_d(_1, _2); + } + // CHECK-LABEL: @vmulwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwev_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmulwev_q_du(_1, _2); + } + // CHECK-LABEL: @vmulwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwod_q_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vmulwod_q_du(_1, _2); + } + // CHECK-LABEL: @vmulwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwev.q.du.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwev_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vmulwev_q_du_d(_1, _2); + } + // CHECK-LABEL: @vmulwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmulwod.q.du.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vmulwod_q_du_d(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vmulwod_q_du_d(_1, _2); + } + // CHECK-LABEL: @vhaddw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.q.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vhaddw_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vhaddw_q_d(_1, _2); + } + // CHECK-LABEL: @vhaddw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhaddw.qu.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vhaddw_qu_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vhaddw_qu_du(_1, _2); + } + // CHECK-LABEL: @vhsubw_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.q.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vhsubw_q_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vhsubw_q_d(_1, _2); + } + // CHECK-LABEL: @vhsubw_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vhsubw.qu.du(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vhsubw_qu_du(v2u64 _1, v2u64 _2) { + return __builtin_lsx_vhsubw_qu_du(_1, _2); + } + // CHECK-LABEL: @vmaddwev_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.w(<2 x i64> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmaddwev_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwev_d_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.h(<4 x i32> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4i32 vmaddwev_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwev_w_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.b(<8 x i16> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8i16 vmaddwev_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwev_h_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu(<2 x i64> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2u64 vmaddwev_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __builtin_lsx_vmaddwev_d_wu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu(<4 x i32> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4u32 vmaddwev_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __builtin_lsx_vmaddwev_w_hu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu(<8 x i16> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8u16 vmaddwev_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vmaddwev_h_bu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.w(<2 x i64> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmaddwod_d_w(v2i64 _1, v4i32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwod_d_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.h(<4 x i32> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4i32 vmaddwod_w_h(v4i32 _1, v8i16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwod_w_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.b(<8 x i16> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8i16 vmaddwod_h_b(v8i16 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwod_h_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu(<2 x i64> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2u64 vmaddwod_d_wu(v2u64 _1, v4u32 _2, v4u32 _3) { + return __builtin_lsx_vmaddwod_d_wu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu(<4 x i32> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4u32 vmaddwod_w_hu(v4u32 _1, v8u16 _2, v8u16 _3) { + return __builtin_lsx_vmaddwod_w_hu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu(<8 x i16> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8u16 vmaddwod_h_bu(v8u16 _1, v16u8 _2, v16u8 _3) { + return __builtin_lsx_vmaddwod_h_bu(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.d.wu.w(<2 x i64> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmaddwev_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwev_d_wu_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwev.w.hu.h(<4 x i32> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4i32 vmaddwev_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwev_w_hu_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwev.h.bu.b(<8 x i16> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8i16 vmaddwev_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwev_h_bu_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_d_wu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> _1:%.*, <4 x i32> _2:%.*, <4 x i32> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.d.wu.w(<2 x i64> TMP0, <4 x i32> TMP1, <4 x i32> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmaddwod_d_wu_w(v2i64 _1, v4u32 _2, v4i32 _3) { + return __builtin_lsx_vmaddwod_d_wu_w(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_w_hu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> _1:%.*, <8 x i16> _2:%.*, <8 x i16> _3:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP3:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vmaddwod.w.hu.h(<4 x i32> TMP0, <8 x i16> TMP1, <8 x i16> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <4 x i32> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v4i32 vmaddwod_w_hu_h(v4i32 _1, v8u16 _2, v8i16 _3) { + return __builtin_lsx_vmaddwod_w_hu_h(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_h_bu_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vmaddwod.h.bu.b(<8 x i16> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <8 x i16> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v8i16 vmaddwod_h_bu_b(v8i16 _1, v16u8 _2, v16i8 _3) { + return __builtin_lsx_vmaddwod_h_bu_b(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.d(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmaddwev_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwev_q_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.d(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmaddwod_q_d(v2i64 _1, v2i64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwod_q_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2u64 vmaddwev_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __builtin_lsx_vmaddwev_q_du(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2u64 vmaddwod_q_du(v2u64 _1, v2u64 _2, v2u64 _3) { + return __builtin_lsx_vmaddwod_q_du(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwev_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwev.q.du.d(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmaddwev_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwev_q_du_d(_1, _2, _3); + } + // CHECK-LABEL: @vmaddwod_q_du_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*, <2 x i64> _3:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP3:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vmaddwod.q.du.d(<2 x i64> TMP0, <2 x i64> TMP1, <2 x i64> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <2 x i64> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v2i64 vmaddwod_q_du_d(v2i64 _1, v2u64 _2, v2i64 _3) { + return __builtin_lsx_vmaddwod_q_du_d(_1, _2, _3); + } + // CHECK-LABEL: @vrotr_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vrotr.b(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vrotr_b(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vrotr_b(_1, _2); + } + // CHECK-LABEL: @vrotr_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vrotr.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vrotr_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vrotr_h(_1, _2); + } + // CHECK-LABEL: @vrotr_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vrotr.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vrotr_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vrotr_w(_1, _2); + } + // CHECK-LABEL: @vrotr_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vrotr.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vrotr_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vrotr_d(_1, _2); + } + // CHECK-LABEL: @vadd_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vadd.q(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vadd_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vadd_q(_1, _2); } + // CHECK-LABEL: @vsub_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsub.q(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsub_q(v2i64 _1, v2i64 _2) { return __builtin_lsx_vsub_q(_1, _2); } + // CHECK-LABEL: @vldrepl_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vldrepl.b(ptr _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <16 x i8> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v16i8 vldrepl_b(void *_1) { return __builtin_lsx_vldrepl_b(_1, 1); } + // CHECK-LABEL: @vldrepl_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vldrepl.h(ptr _1:%.*, i32 2) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <8 x i16> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v8i16 vldrepl_h(void *_1) { return __builtin_lsx_vldrepl_h(_1, 2); } + // CHECK-LABEL: @vldrepl_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vldrepl.w(ptr _1:%.*, i32 4) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x i32> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v4i32 vldrepl_w(void *_1) { return __builtin_lsx_vldrepl_w(_1, 4); } + // CHECK-LABEL: @vldrepl_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vldrepl.d(ptr _1:%.*, i32 8) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <2 x i64> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v2i64 vldrepl_d(void *_1) { return __builtin_lsx_vldrepl_d(_1, 8); } + // CHECK-LABEL: @vmskgez_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmskgez.b(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vmskgez_b(v16i8 _1) { return __builtin_lsx_vmskgez_b(_1); } + // CHECK-LABEL: @vmsknz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vmsknz.b(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vmsknz_b(v16i8 _1) { return __builtin_lsx_vmsknz_b(_1); } + // CHECK-LABEL: @vexth_h_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vexth.h.b(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vexth_h_b(v16i8 _1) { return __builtin_lsx_vexth_h_b(_1); } + // CHECK-LABEL: @vexth_w_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vexth.w.h(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vexth_w_h(v8i16 _1) { return __builtin_lsx_vexth_w_h(_1); } + // CHECK-LABEL: @vexth_d_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.d.w(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vexth_d_w(v4i32 _1) { return __builtin_lsx_vexth_d_w(_1); } + // CHECK-LABEL: @vexth_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.q.d(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vexth_q_d(v2i64 _1) { return __builtin_lsx_vexth_q_d(_1); } + // CHECK-LABEL: @vexth_hu_bu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vexth.hu.bu(<16 x i8> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8u16 vexth_hu_bu(v16u8 _1) { return __builtin_lsx_vexth_hu_bu(_1); } + // CHECK-LABEL: @vexth_wu_hu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vexth.wu.hu(<8 x i16> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4u32 vexth_wu_hu(v8u16 _1) { return __builtin_lsx_vexth_wu_hu(_1); } + // CHECK-LABEL: @vexth_du_wu( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.du.wu(<4 x i32> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vexth_du_wu(v4u32 _1) { return __builtin_lsx_vexth_du_wu(_1); } + // CHECK-LABEL: @vexth_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vexth.qu.du(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vexth_qu_du(v2u64 _1) { return __builtin_lsx_vexth_qu_du(_1); } + // CHECK-LABEL: @vrotri_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vrotri.b(<16 x i8> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <16 x i8> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v16i8 vrotri_b(v16i8 _1) { return __builtin_lsx_vrotri_b(_1, 1); } + // CHECK-LABEL: @vrotri_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> _1:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vrotri.h(<8 x i16> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <8 x i16> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v8i16 vrotri_h(v8i16 _1) { return __builtin_lsx_vrotri_h(_1, 1); } + // CHECK-LABEL: @vrotri_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> _1:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vrotri.w(<4 x i32> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <4 x i32> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v4i32 vrotri_w(v4i32 _1) { return __builtin_lsx_vrotri_w(_1, 1); } + // CHECK-LABEL: @vrotri_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> _1:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vrotri.d(<2 x i64> TMP0, i32 1) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vrotri_d(v2i64 _1) { return __builtin_lsx_vrotri_d(_1, 1); } + // CHECK-LABEL: @vextl_q_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextl.q.d(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2i64 vextl_q_d(v2i64 _1) { return __builtin_lsx_vextl_q_d(_1); } + // CHECK-LABEL: @vsrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlni.b.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrlni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vsrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlni.h.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrlni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vsrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlni.w.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrlni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vsrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlni.d.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsrlni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vsrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrlrni.b.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrlrni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrlrni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vsrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrlrni.h.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrlrni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrlrni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vsrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrlrni.w.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrlrni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrlrni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vsrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrlrni.d.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsrlrni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrlrni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.b.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssrlni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrlni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.h.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssrlni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.w.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssrlni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.d.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vssrlni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlni.bu.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssrlni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrlni_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlni.hu.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssrlni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrlni_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlni.wu.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssrlni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrlni_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrlni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlni.du.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vssrlni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrlni_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.b.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssrlrni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrlrni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.h.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssrlrni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.w.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssrlrni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.d.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vssrlrni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrni.bu.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssrlrni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrlrni_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrni.hu.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssrlrni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrni_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrni.wu.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssrlrni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrni_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrlrni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrlrni.du.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vssrlrni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrni_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vsrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrani.b.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrani_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrani_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vsrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrani.h.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrani_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrani_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vsrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrani.w.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrani_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrani_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vsrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrani.d.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsrani_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrani_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vsrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vsrarni.b.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vsrarni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vsrarni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vsrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vsrarni.h.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vsrarni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vsrarni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vsrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vsrarni.w.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vsrarni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vsrarni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vsrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vsrarni.d.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vsrarni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vsrarni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.b.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssrani_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrani_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.h.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssrani_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrani_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.w.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssrani_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrani_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.d.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vssrani_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrani_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrani.bu.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssrani_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrani_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrani.hu.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssrani_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrani_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrani.wu.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssrani_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrani_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrani_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrani.du.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vssrani_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrani_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.b.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssrarni_b_h(v16i8 _1, v16i8 _2) { + return __builtin_lsx_vssrarni_b_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.h.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssrarni_h_w(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrarni_h_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.w.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssrarni_w_d(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrarni_w_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_d_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.d.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vssrarni_d_q(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrarni_d_q(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_bu_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> _1:%.*, <16 x i8> _2:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrarni.bu.h(<16 x i8> TMP0, <16 x i8> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16u8 vssrarni_bu_h(v16u8 _1, v16i8 _2) { + return __builtin_lsx_vssrarni_bu_h(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_hu_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> _1:%.*, <8 x i16> _2:%.*, i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrarni.hu.w(<8 x i16> TMP0, <8 x i16> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8u16 vssrarni_hu_w(v8u16 _1, v8i16 _2) { + return __builtin_lsx_vssrarni_hu_w(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_wu_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrarni.wu.d(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4u32 vssrarni_wu_d(v4u32 _1, v4i32 _2) { + return __builtin_lsx_vssrarni_wu_d(_1, _2, 1); + } + // CHECK-LABEL: @vssrarni_du_q( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> _1:%.*, <2 x i64> _2:%.*, i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vssrarni.du.q(<2 x i64> TMP0, <2 x i64> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2u64 vssrarni_du_q(v2u64 _1, v2i64 _2) { + return __builtin_lsx_vssrarni_du_q(_1, _2, 1); + } + // CHECK-LABEL: @vpermi_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*, i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vpermi.w(<4 x i32> TMP0, <4 x i32> TMP1, i32 1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vpermi_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vpermi_w(_1, _2, 1); +@@ -4660,79 +6390,107 @@ v4i32 vpermi_w(v4i32 _1, v4i32 _2) { + // CHECK-LABEL: @vld( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vld(ptr _1:%.*, i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <16 x i8> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v16i8 vld(void *_1) { return __builtin_lsx_vld(_1, 1); } + // CHECK-LABEL: @vst( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> _1:%.*, ptr _2:%.*, i32 1) ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vst(<16 x i8> TMP0, ptr _2:%.*, i32 1) + // CHECK-NEXT: ret void + // + void vst(v16i8 _1, void *_2) { return __builtin_lsx_vst(_1, _2, 1); } + // CHECK-LABEL: @vssrlrn_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrlrn.b.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssrlrn_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrlrn_b_h(_1, _2); + } + // CHECK-LABEL: @vssrlrn_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrlrn.h.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssrlrn_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrlrn_h_w(_1, _2); + } + // CHECK-LABEL: @vssrlrn_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrlrn.w.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssrlrn_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrlrn_w_d(_1, _2); + } + // CHECK-LABEL: @vssrln_b_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> _1:%.*, <8 x i16> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vssrln.b.h(<8 x i16> TMP0, <8 x i16> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vssrln_b_h(v8i16 _1, v8i16 _2) { + return __builtin_lsx_vssrln_b_h(_1, _2); + } + // CHECK-LABEL: @vssrln_h_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> _1:%.*, <4 x i32> _2:%.*) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP2:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vssrln.h.w(<4 x i32> TMP0, <4 x i32> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <8 x i16> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v8i16 vssrln_h_w(v4i32 _1, v4i32 _2) { + return __builtin_lsx_vssrln_h_w(_1, _2); + } + // CHECK-LABEL: @vssrln_w_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> _1:%.*, <2 x i64> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vssrln.w.d(<2 x i64> TMP0, <2 x i64> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vssrln_w_d(v2i64 _1, v2i64 _2) { + return __builtin_lsx_vssrln_w_d(_1, _2); + } + // CHECK-LABEL: @vorn_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> _1:%.*, <16 x i8> _2:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vorn.v(<16 x i8> TMP0, <16 x i8> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <16 x i8> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v16i8 vorn_v(v16i8 _1, v16i8 _2) { return __builtin_lsx_vorn_v(_1, _2); } + // CHECK-LABEL: @vldi( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vldi(i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <2 x i64> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v2i64 vldi() { return __builtin_lsx_vldi(1); } + // CHECK-LABEL: @vshuf_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> _1:%.*, <16 x i8> _2:%.*, <16 x i8> _3:%.*) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP2:%.* = bitcast i128 _3_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP3:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vshuf.b(<16 x i8> TMP0, <16 x i8> TMP1, <16 x i8> TMP2) ++// CHECK-NEXT: TMP4:%.* = bitcast <16 x i8> TMP3 to i128 ++// CHECK-NEXT: ret i128 TMP4 + // + v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + return __builtin_lsx_vshuf_b(_1, _2, _3); +@@ -4740,429 +6498,575 @@ v16i8 vshuf_b(v16i8 _1, v16i8 _2, v16i8 _3) { + // CHECK-LABEL: @vldx( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vldx(ptr _1:%.*, i64 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <16 x i8> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v16i8 vldx(void *_1) { return __builtin_lsx_vldx(_1, 1); } + // CHECK-LABEL: @vstx( + // CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> _1:%.*, ptr _2:%.*, i64 1) ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: tail call void @llvm.loongarch.lsx.vstx(<16 x i8> TMP0, ptr _2:%.*, i64 1) + // CHECK-NEXT: ret void + // + void vstx(v16i8 _1, void *_2) { return __builtin_lsx_vstx(_1, _2, 1); } + // CHECK-LABEL: @vextl_qu_du( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vextl.qu.du(<2 x i64> TMP0) ++// CHECK-NEXT: TMP2:%.* = bitcast <2 x i64> TMP1 to i128 ++// CHECK-NEXT: ret i128 TMP2 + // + v2u64 vextl_qu_du(v2u64 _1) { return __builtin_lsx_vextl_qu_du(_1); } + // CHECK-LABEL: @bnz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bnz.b(<16 x i8> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bnz_b(v16u8 _1) { return __builtin_lsx_bnz_b(_1); } + // CHECK-LABEL: @bnz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bnz.d(<2 x i64> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bnz_d(v2u64 _1) { return __builtin_lsx_bnz_d(_1); } + // CHECK-LABEL: @bnz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bnz.h(<8 x i16> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bnz_h(v8u16 _1) { return __builtin_lsx_bnz_h(_1); } + // CHECK-LABEL: @bnz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bnz.v(<16 x i8> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bnz_v(v16u8 _1) { return __builtin_lsx_bnz_v(_1); } + // CHECK-LABEL: @bnz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bnz.w(<4 x i32> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bnz_w(v4u32 _1) { return __builtin_lsx_bnz_w(_1); } + // CHECK-LABEL: @bz_b( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bz.b(<16 x i8> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bz_b(v16u8 _1) { return __builtin_lsx_bz_b(_1); } + // CHECK-LABEL: @bz_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x i64> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bz.d(<2 x i64> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bz_d(v2u64 _1) { return __builtin_lsx_bz_d(_1); } + // CHECK-LABEL: @bz_h( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <8 x i16> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bz.h(<8 x i16> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bz_h(v8u16 _1) { return __builtin_lsx_bz_h(_1); } + // CHECK-LABEL: @bz_v( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <16 x i8> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bz.v(<16 x i8> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bz_v(v16u8 _1) { return __builtin_lsx_bz_v(_1); } + // CHECK-LABEL: @bz_w( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> _1:%.*) +-// CHECK-NEXT: ret i32 TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x i32> ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.lsx.bz.w(<4 x i32> TMP0) ++// CHECK-NEXT: ret i32 TMP1 + // + int bz_w(v4u32 _1) { return __builtin_lsx_bz_w(_1); } + // CHECK-LABEL: @vfcmp_caf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.caf.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_caf_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_caf_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_caf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.caf.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_caf_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_caf_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_ceq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.ceq.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_ceq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_ceq_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_ceq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.ceq.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_ceq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_ceq_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cle.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_cle_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cle_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cle.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_cle_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cle_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_clt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.clt.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_clt_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_clt_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_clt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.clt.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_clt_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_clt_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cne.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_cne_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cne_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cne.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_cne_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cne_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cor.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_cor_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cor_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cor.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_cor_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cor_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cueq.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_cueq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cueq_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cueq.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_cueq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cueq_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cule.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_cule_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cule_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cule.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_cule_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cule_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cult.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_cult_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cult_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cult.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_cult_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cult_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cun.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_cun_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cun_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.cune.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_cune_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_cune_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_cune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cune.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_cune_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cune_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_cun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.cun.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_cun_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_cun_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_saf_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.saf.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_saf_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_saf_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_saf_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.saf.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_saf_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_saf_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_seq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.seq.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_seq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_seq_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_seq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.seq.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_seq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_seq_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sle_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sle.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_sle_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sle_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sle_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sle.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_sle_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sle_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_slt_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.slt.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_slt_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_slt_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_slt_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.slt.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_slt_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_slt_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sne_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sne.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_sne_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sne_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sne_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sne.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_sne_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sne_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sor_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sor.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_sor_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sor_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sor_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sor.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_sor_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sor_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sueq_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sueq.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_sueq_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sueq_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sueq_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sueq.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_sueq_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sueq_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sule_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sule.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_sule_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sule_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sule_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sule.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_sule_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sule_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sult_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sult.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_sult_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sult_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sult_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sult.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_sult_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sult_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sun_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sun.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_sun_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sun_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sune_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> _1:%.*, <2 x double> _2:%.*) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <2 x double> ++// CHECK-NEXT: TMP2:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vfcmp.sune.d(<2 x double> TMP0, <2 x double> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <2 x i64> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v2i64 vfcmp_sune_d(v2f64 _1, v2f64 _2) { + return __builtin_lsx_vfcmp_sune_d(_1, _2); + } + // CHECK-LABEL: @vfcmp_sune_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sune.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_sune_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sune_s(_1, _2); + } + // CHECK-LABEL: @vfcmp_sun_s( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> _1:%.*, <4 x float> _2:%.*) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP0:%.* = bitcast i128 _1_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP1:%.* = bitcast i128 _2_COERCE:%.* to <4 x float> ++// CHECK-NEXT: TMP2:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vfcmp.sun.s(<4 x float> TMP0, <4 x float> TMP1) ++// CHECK-NEXT: TMP3:%.* = bitcast <4 x i32> TMP2 to i128 ++// CHECK-NEXT: ret i128 TMP3 + // + v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { + return __builtin_lsx_vfcmp_sun_s(_1, _2); +@@ -5170,24 +7074,28 @@ v4i32 vfcmp_sun_s(v4f32 _1, v4f32 _2) { + // CHECK-LABEL: @vrepli_b( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <16 x i8> @llvm.loongarch.lsx.vrepli.b(i32 1) +-// CHECK-NEXT: ret <16 x i8> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <16 x i8> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v16i8 vrepli_b() { return __builtin_lsx_vrepli_b(1); } + // CHECK-LABEL: @vrepli_d( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <2 x i64> @llvm.loongarch.lsx.vrepli.d(i32 1) +-// CHECK-NEXT: ret <2 x i64> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <2 x i64> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v2i64 vrepli_d() { return __builtin_lsx_vrepli_d(1); } + // CHECK-LABEL: @vrepli_h( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <8 x i16> @llvm.loongarch.lsx.vrepli.h(i32 1) +-// CHECK-NEXT: ret <8 x i16> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <8 x i16> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v8i16 vrepli_h() { return __builtin_lsx_vrepli_h(1); } + // CHECK-LABEL: @vrepli_w( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call <4 x i32> @llvm.loongarch.lsx.vrepli.w(i32 1) +-// CHECK-NEXT: ret <4 x i32> TMP0 ++// CHECK-NEXT: TMP1:%.* = bitcast <4 x i32> TMP0 to i128 ++// CHECK-NEXT: ret i128 TMP1 + // + v4i32 vrepli_w() { return __builtin_lsx_vrepli_w(1); } +-- +2.20.1 +
View file
_service:tar_scm:0009-Backport-LoongArch-improve-the-support-for-compiler-rt-and-bugfix.patch
Added
@@ -0,0 +1,639 @@ +From 2ff5a2bd0a7d14c675cbc5f0e11bf6916b960b20 Mon Sep 17 00:00:00 2001 +From: Ami-zhang <96056515+Ami-zhang@users.noreply.github.com> +Date: Thu, 28 Sep 2023 15:20:27 +0800 +Subject: PATCH 1/9 Driver Support -fsanitize=cfi-icall on loongarch64 + (#67310) + +(cherry picked from commit 55accc82bec48acae769b086ad9a5dc29da77f02) +--- + clang/lib/Driver/ToolChain.cpp | 3 ++- + clang/test/Driver/fsanitize.c | 1 + + 2 files changed, 3 insertions(+), 1 deletion(-) + +diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp +index 0146d8af3549..69811c095594 100644 +--- a/clang/lib/Driver/ToolChain.cpp ++++ b/clang/lib/Driver/ToolChain.cpp +@@ -1273,7 +1273,8 @@ SanitizerMask ToolChain::getSupportedSanitizers() const { + if (getTriple().getArch() == llvm::Triple::x86 || + getTriple().getArch() == llvm::Triple::x86_64 || + getTriple().getArch() == llvm::Triple::arm || getTriple().isWasm() || +- getTriple().isAArch64() || getTriple().isRISCV()) ++ getTriple().isAArch64() || getTriple().isRISCV() || ++ getTriple().isLoongArch64()) + Res |= SanitizerKind::CFIICall; + if (getTriple().getArch() == llvm::Triple::x86_64 || + getTriple().isAArch64(64) || getTriple().isRISCV()) +diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c +index 9442f6b91471..4a525d75ea11 100644 +--- a/clang/test/Driver/fsanitize.c ++++ b/clang/test/Driver/fsanitize.c +@@ -600,6 +600,7 @@ + // RUN: %clang --target=aarch64_be -fvisibility=hidden -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI + // RUN: %clang --target=riscv32 -fvisibility=hidden -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI + // RUN: %clang --target=riscv64 -fvisibility=hidden -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI ++// RUN: %clang --target=loongarch64 -fvisibility=hidden -fsanitize=cfi -flto -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-CFI + // CHECK-CFI: -emit-llvm-bc{{.*}}-fsanitize=cfi-derived-cast,cfi-icall,cfi-mfcall,cfi-unrelated-cast,cfi-nvcall,cfi-vcall + // CHECK-CFI-NOMFCALL: -emit-llvm-bc{{.*}}-fsanitize=cfi-derived-cast,cfi-icall,cfi-unrelated-cast,cfi-nvcall,cfi-vcall + // CHECK-CFI-DCAST: -emit-llvm-bc{{.*}}-fsanitize=cfi-derived-cast +-- +2.20.1 + + +From acfb50b03b0be3eda5282f26bad34ffc18595b30 Mon Sep 17 00:00:00 2001 +From: Nathan Chancellor <nathan@kernel.org> +Date: Fri, 8 Sep 2023 10:54:35 -0700 +Subject: PATCH 2/9 ClangLoongArch Generate _mcount instead of mcount + (#65657) + +When building the LoongArch Linux kernel without +`CONFIG_DYNAMIC_FTRACE`, the build fails to link because the mcount +symbol is `mcount`, not `_mcount` like GCC generates and the kernel +expects: + +``` +ld.lld: error: undefined symbol: mcount +>>> referenced by version.c +>>> init/version.o:(early_hostname) in archive vmlinux.a +>>> referenced by do_mounts.c +>>> init/do_mounts.o:(rootfs_init_fs_context) in archive vmlinux.a +>>> referenced by main.c +>>> init/main.o:(__traceiter_initcall_level) in archive vmlinux.a +>>> referenced 97011 more times +>>> did you mean: _mcount +>>> defined in: vmlinux.a(arch/loongarch/kernel/mcount.o) +``` + +Set `MCountName` in `LoongArchTargetInfo` to `_mcount`, which resolves +the build failure. + +(cherry picked from commit cc2b09bee017147527e7bd1eb5272f4f70a7b900) +--- + clang/lib/Basic/Targets/LoongArch.h | 1 + + clang/test/CodeGen/mcount.c | 2 ++ + 2 files changed, 3 insertions(+) + +diff --git a/clang/lib/Basic/Targets/LoongArch.h b/clang/lib/Basic/Targets/LoongArch.h +index 8f4150b2539d..3313102492cb 100644 +--- a/clang/lib/Basic/Targets/LoongArch.h ++++ b/clang/lib/Basic/Targets/LoongArch.h +@@ -40,6 +40,7 @@ public: + LongDoubleWidth = 128; + LongDoubleAlign = 128; + LongDoubleFormat = &llvm::APFloat::IEEEquad(); ++ MCountName = "_mcount"; + SuitableAlign = 128; + WCharType = SignedInt; + WIntType = UnsignedInt; +diff --git a/clang/test/CodeGen/mcount.c b/clang/test/CodeGen/mcount.c +index 8f994ab4e754..bdd609c1dfc5 100644 +--- a/clang/test/CodeGen/mcount.c ++++ b/clang/test/CodeGen/mcount.c +@@ -7,6 +7,8 @@ + // RUN: %clang_cc1 -pg -triple x86_64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple arm-netbsd-eabi -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple aarch64-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s ++// RUN: %clang_cc1 -pg -triple loongarch32 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s ++// RUN: %clang_cc1 -pg -triple loongarch64 -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple mips-netbsd -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-DOUBLE-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple mips-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s + // RUN: %clang_cc1 -pg -triple mipsel-unknown-gnu-linux -emit-llvm -o - %s | FileCheck -check-prefixes=CHECK-PREFIXED,NO-MCOUNT1 %s +-- +2.20.1 + + +From a841576d7a53d3d8fd61aa854af7d9c2dd204536 Mon Sep 17 00:00:00 2001 +From: Weining Lu <luweining@loongson.cn> +Date: Thu, 26 Oct 2023 11:50:28 +0800 +Subject: PATCH 3/9 LoongArchtest Add some ABI regression tests for empty + struct. NFC + +How empty structs (not as fields of container struct) are passed in C++ +is not explicitly documented in psABI. This patch adds some tests +showing the current handing of clang. Some of the results are different +from gcc. Following patch(es) will try to fix the mismatch. + +(cherry picked from commit 8149066fa532d82ff62a0629d5a9fab6bd4da768) +--- + .../LoongArch/abi-lp64d-empty-structs.c | 53 +++++++++++++++++++ + 1 file changed, 53 insertions(+) + +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +index fb90bf556c19..d0daafac336e 100644 +--- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +@@ -81,9 +81,62 @@ struct s8 test_s8(struct s8 a) { + return a; + } + ++/// Note: Below tests check how empty structs are passed while above tests check ++/// empty structs as fields of container struct are ignored when flattening ++/// structs to examine whether the container structs can be passed via FARs. ++ + // CHECK-C: define{{.*}} void @test_s9() + // CHECK-CXX: define{{.*}} i64 @_Z7test_s92s9(i64 {{.*}}) + struct s9 { struct empty e; }; + struct s9 test_s9(struct s9 a) { + return a; + } ++ ++// CHECK-C: define{{.*}} void @test_s10() ++// CHECK-CXX: define{{.*}} void @_Z8test_s103s10() ++struct s10 { }; ++struct s10 test_s10(struct s10 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s11() ++// CHECK-CXX: define{{.*}} i64 @_Z8test_s113s11(i64 {{.*}}) ++struct s11 { struct { } s; }; ++struct s11 test_s11(struct s11 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s12() ++// CHECK-CXX: define{{.*}} void @_Z8test_s123s12() ++struct s12 { int i0; }; ++struct s12 test_s12(struct s12 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s13() ++// CHECK-CXX: define{{.*}} void @_Z8test_s133s13() ++struct s13 { struct { } s0; }; ++struct s13 test_s13(struct s13 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s14() ++// CHECK-CXX: define{{.*}} i64 @_Z8test_s143s14(i64 {{.*}}) ++struct s14 { struct { } s1; }; ++struct s14 test_s14(struct s14 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s15() ++// CHECK-CXX: define{{.*}} void @_Z8test_s153s15() ++struct s15 { int : 0; }; ++struct s15 test_s15(struct s15 a) { ++ return a; ++} ++ ++// CHECK-C: define{{.*}} void @test_s16() ++// CHECK-CXX: define{{.*}} void @_Z8test_s163s16() ++struct s16 { int : 1; }; ++struct s16 test_s16(struct s16 a) { ++ return a; ++} +-- +2.20.1 + + +From 6248fa0fc405952a8b907624c27b2dd1ee86a962 Mon Sep 17 00:00:00 2001 +From: Lu Weining <luweining@loongson.cn> +Date: Tue, 31 Oct 2023 21:18:06 +0800 +Subject: PATCH 4/9 LoongArch Fix ABI mismatch with gcc/g++ about empty + structs passing (#70320) + +How empty structs (not as fields of container struct) are passed in C++ +is not explicitly documented in psABI. However, this patch fixes the +mismatch with g++. + +Note that the unnamed bitfield case `struct { int : 1; }` in C is also +fixed. Previously clang regards it as an empty struct and then ignores +it when passing. Now size of the struct is counted; since it's size is +not 0, clang will not ignore it even in C. + +While https://reviews.llvm.org/D156116 fixed the handling of empty +struct when considering eligibility of the container struct for the FP +calling convention ('flattening'), this patch fixes the handling of +passing the empty struct itself. + +Fix https://github.com/llvm/llvm-project/issues/70319 + +(cherry picked from commit 9ca6bf3fb7b7df373723b3275730f101f9ff816b) +--- + clang/lib/CodeGen/Targets/LoongArch.cpp | 10 ++++++---- + clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c | 8 ++++---- + 2 files changed, 10 insertions(+), 8 deletions(-) + +diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp +index 7483bf6d6d1e..bc508a99da9c 100644 +--- a/clang/lib/CodeGen/Targets/LoongArch.cpp ++++ b/clang/lib/CodeGen/Targets/LoongArch.cpp +@@ -308,12 +308,14 @@ ABIArgInfo LoongArchABIInfo::classifyArgumentType(QualType Ty, bool IsFixed, + CGCXXABI::RAA_DirectInMemory); + } + +- // Ignore empty structs/unions. +- if (isEmptyRecord(getContext(), Ty, true)) +- return ABIArgInfo::getIgnore(); +- + uint64_t Size = getContext().getTypeSize(Ty); + ++ // Ignore empty struct or union whose size is zero, e.g. `struct { }` in C or ++ // `struct { int a0; }` in C++. In C++, `struct { }` is empty but it's size ++ // is 1 byte and g++ doesn't ignore it; clang++ matches this behaviour. ++ if (isEmptyRecord(getContext(), Ty, true) && Size == 0) ++ return ABIArgInfo::getIgnore(); ++ + // Pass floating point values via FARs if possible. + if (IsFixed && Ty->isFloatingType() && !Ty->isComplexType() && + FRLen >= Size && FARsLeft) { +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +index d0daafac336e..281b7b15841a 100644 +--- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +@@ -93,7 +93,7 @@ struct s9 test_s9(struct s9 a) { + } + + // CHECK-C: define{{.*}} void @test_s10() +-// CHECK-CXX: define{{.*}} void @_Z8test_s103s10() ++// CHECK-CXX: define{{.*}} i64 @_Z8test_s103s10(i64 {{.*}}) + struct s10 { }; + struct s10 test_s10(struct s10 a) { + return a; +@@ -128,14 +128,14 @@ struct s14 test_s14(struct s14 a) { + } + + // CHECK-C: define{{.*}} void @test_s15() +-// CHECK-CXX: define{{.*}} void @_Z8test_s153s15() ++// CHECK-CXX: define{{.*}} i64 @_Z8test_s153s15(i64 {{.*}}) + struct s15 { int : 0; }; + struct s15 test_s15(struct s15 a) { + return a; + } + +-// CHECK-C: define{{.*}} void @test_s16() +-// CHECK-CXX: define{{.*}} void @_Z8test_s163s16() ++// CHECK-C: define{{.*}} i64 @test_s16(i64 {{.*}}) ++// CHECK-CXX: define{{.*}} i64 @_Z8test_s163s16(i64 {{.*}}) + struct s16 { int : 1; }; + struct s16 test_s16(struct s16 a) { + return a; +-- +2.20.1 + + +From 028d0d88cd73c724f954577dc90cbbc2873a6832 Mon Sep 17 00:00:00 2001 +From: Weining Lu <luweining@loongson.cn> +Date: Thu, 2 Nov 2023 09:29:43 +0800 +Subject: PATCH 5/9 LoongArch Pre-commit test for issue #70890 + +(cherry picked from commit 749083b91f31f370cf64831d3e7e6215b6d51442) +--- + .../LoongArch/abi-lp64d-empty-unions.c | 26 +++++++++++++++++++ + 1 file changed, 26 insertions(+) + create mode 100644 clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c + +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c +new file mode 100644 +index 000000000000..b0607425336e +--- /dev/null ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c +@@ -0,0 +1,26 @@ ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - | \ ++// RUN: FileCheck --check-prefix=CHECK-C %s ++// RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - -x c++ | \ ++// RUN: FileCheck --check-prefix=CHECK-CXX %s ++ ++#include <stdint.h> ++ ++// CHECK-C: define{{.*}} void @test1() ++// CHECK-CXX: define{{.*}} i64 @_Z5test12u1(i64{{^,*}}) ++union u1 { }; ++union u1 test1(union u1 a) { ++ return a; ++} ++ ++struct s1 { ++ union u1 u; ++ int i; ++ float f; ++}; ++ ++// CHECK-C: define{{.*}} { i32, float } @test2(i32{{^,*}}, float{{^,*}}) ++/// FIXME: This doesn't match g++. ++// CHECK-CXX: define{{.*}} { i32, float } @_Z5test22s1(i32{{^,*}}, float{{^,*}}) ++struct s1 test2(struct s1 a) { ++ return a; ++} +-- +2.20.1 + + +From 8c4371c0e53635a23852d0dc7025b4c48495277b Mon Sep 17 00:00:00 2001 +From: Lu Weining <luweining@loongson.cn> +Date: Sat, 4 Nov 2023 10:04:37 +0800 +Subject: PATCH 6/9 LoongArch Fix ABI mismatch with g++ when handling empty + unions (#71025) + +In g++, empty unions are not ignored like empty structs when flattening +structs to examine whether the structs can be passed via FARs in C++. +This patch aligns clang++ with g++. + +Fix https://github.com/llvm/llvm-project/issues/70890. + +(cherry picked from commit 4253fdc2c462da61cc0deb74a43265665720c828) +--- + clang/lib/CodeGen/Targets/LoongArch.cpp | 7 ++++--- + clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c | 2 +- + clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c | 3 +-- + 3 files changed, 6 insertions(+), 6 deletions(-) + +diff --git a/clang/lib/CodeGen/Targets/LoongArch.cpp b/clang/lib/CodeGen/Targets/LoongArch.cpp +index bc508a99da9c..63b9a1fdb988 100644 +--- a/clang/lib/CodeGen/Targets/LoongArch.cpp ++++ b/clang/lib/CodeGen/Targets/LoongArch.cpp +@@ -170,10 +170,11 @@ bool LoongArchABIInfo::detectFARsEligibleStructHelper( + // copy constructor are not eligible for the FP calling convention. + if (getRecordArgABI(Ty, CGT.getCXXABI())) + return false; +- if (isEmptyRecord(getContext(), Ty, true, true)) +- return true; + const RecordDecl *RD = RTy->getDecl(); +- // Unions aren't eligible unless they're empty (which is caught above). ++ if (isEmptyRecord(getContext(), Ty, true, true) && ++ (!RD->isUnion() || !isa<CXXRecordDecl>(RD))) ++ return true; ++ // Unions aren't eligible unless they're empty in C (which is caught above). + if (RD->isUnion()) + return false; + const ASTRecordLayout &Layout = getContext().getASTRecordLayout(RD); +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +index 281b7b15841a..2f7596f0ebdc 100644 +--- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-structs.c +@@ -3,7 +3,7 @@ + // RUN: %clang_cc1 -triple loongarch64 -target-feature +f -target-feature +d -target-abi lp64d -emit-llvm %s -o - -x c++ | \ + // RUN: FileCheck --check-prefix=CHECK-CXX %s + +-// Fields containing empty structs or unions are ignored when flattening ++// Fields containing empty structs are ignored when flattening + // structs to examine whether the structs can be passed via FARs, even in C++. + // But there is an exception that non-zero-length array of empty structures are + // not ignored in C++. These rules are not documented in psABI <https://www.github.com/loongson/la-abi-specs> +diff --git a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c +index b0607425336e..363e37efb646 100644 +--- a/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c ++++ b/clang/test/CodeGen/LoongArch/abi-lp64d-empty-unions.c +@@ -19,8 +19,7 @@ struct s1 { + }; + + // CHECK-C: define{{.*}} { i32, float } @test2(i32{{^,*}}, float{{^,*}}) +-/// FIXME: This doesn't match g++. +-// CHECK-CXX: define{{.*}} { i32, float } @_Z5test22s1(i32{{^,*}}, float{{^,*}}) ++// CHECK-CXX: define{{.*}} 2 x i64 @_Z5test22s1(2 x i64{{^,*}}) + struct s1 test2(struct s1 a) { + return a; + } +-- +2.20.1 + + +From 8e855955a009ec398b9f7da88e980dae9d20c420 Mon Sep 17 00:00:00 2001 +From: Fangrui Song <i@maskray.me> +Date: Tue, 14 Nov 2023 00:43:40 -0800 +Subject: PATCH 7/9 Driver Default LoongArch to + -fno-direct-access-external-data for non-PIC (#72221) + +For -fno-pic, if an extern variable is defined in a DSO, a copy +relocation will be needed. However, loongarch*-linux does not and will +not support copy relocations. + +Change Driver to default to -fno-direct-access-external-data for +LoongArch && non-PIC. +Keep Frontend conditions unchanged (-fdirect-access-external-data || +-fno-direct-access-external-data && PIC>0 => direct access). + +Fix #71645 + +(cherry picked from commit 47eeee297775347cbdb7624d6a766c2a3eec4a59) +--- + clang/lib/Driver/ToolChains/Clang.cpp | 7 ++++++- + clang/test/Driver/fdirect-access-external-data.c | 6 ++++++ + 2 files changed, 12 insertions(+), 1 deletion(-) + +diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp +index 6b5930990f11..b21aeaee7f5a 100644 +--- a/clang/lib/Driver/ToolChains/Clang.cpp ++++ b/clang/lib/Driver/ToolChains/Clang.cpp +@@ -5632,10 +5632,15 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, + // defaults to -fno-direct-access-external-data. Pass the option if different + // from the default. + if (Arg *A = Args.getLastArg(options::OPT_fdirect_access_external_data, +- options::OPT_fno_direct_access_external_data)) ++ options::OPT_fno_direct_access_external_data)) { + if (A->getOption().matches(options::OPT_fdirect_access_external_data) != + (PICLevel == 0)) + A->render(Args, CmdArgs); ++ } else if (PICLevel == 0 && Triple.isLoongArch()) { ++ // Some targets default to -fno-direct-access-external-data even for ++ // -fno-pic. ++ CmdArgs.push_back("-fno-direct-access-external-data"); ++ } + + if (Args.hasFlag(options::OPT_fno_plt, options::OPT_fplt, false)) { + CmdArgs.push_back("-fno-plt"); +diff --git a/clang/test/Driver/fdirect-access-external-data.c b/clang/test/Driver/fdirect-access-external-data.c +index f132b1b088af..a6da776e6977 100644 +--- a/clang/test/Driver/fdirect-access-external-data.c ++++ b/clang/test/Driver/fdirect-access-external-data.c +@@ -9,6 +9,12 @@ + // RUN: %clang -### -c -target aarch64 %s -fpic 2>&1 | FileCheck %s --check-prefix=DEFAULT + // RUN: %clang -### -c -target aarch64 %s -fpic -fdirect-access-external-data 2>&1 | FileCheck %s --check-prefix=DIRECT + ++/// loongarch* targets default to -fno-direct-access-external-data even for -fno-pic. ++// RUN: %clang -### -c --target=loongarch64 -fno-pic %s 2>&1 | FileCheck %s --check-prefix=INDIRECT ++// RUN: %clang -### -c --target=loongarch64 -fpie %s 2>&1 | FileCheck %s --check-prefix=DEFAULT ++// RUN: %clang -### -c --target=loongarch32 -fno-pic -fdirect-access-external-data %s 2>&1 | FileCheck %s --check-prefix=DEFAULT ++// RUN: %clang -### -c --target=loongarch32 -fpie -fdirect-access-external-data %s 2>&1 | FileCheck %s --check-prefix=DIRECT ++ + // DEFAULT-NOT: direct-access-external-data" + // DIRECT: "-fdirect-access-external-data" + // INDIRECT: "-fno-direct-access-external-data" +-- +2.20.1 + + +From 29409970a5c68e20022a05457127102a66abfead Mon Sep 17 00:00:00 2001 +From: wanglei <wanglei@loongson.cn> +Date: Tue, 5 Mar 2024 19:44:28 +0800 +Subject: PATCH 8/9 ClangLoongArch Precommit test for fix wrong return + value type of __iocsrrd_h. NFC + +(cherry picked from commit aeda1a6e800e0dd6c91c0332b4db95094ad5b301) +(cherry picked from commit a9ba36c7e7d7fa076f201843e3b826b6c6d7f5ef) +--- + clang/test/CodeGen/LoongArch/intrinsic-la32.c | 29 ++++++++++++++----- + clang/test/CodeGen/LoongArch/intrinsic-la64.c | 21 ++++++++++++-- + 2 files changed, 40 insertions(+), 10 deletions(-) + +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32.c b/clang/test/CodeGen/LoongArch/intrinsic-la32.c +index 93d54f511a9c..6a8d99880be3 100644 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la32.c ++++ b/clang/test/CodeGen/LoongArch/intrinsic-la32.c +@@ -169,8 +169,8 @@ unsigned int cpucfg(unsigned int a) { + + // LA32-LABEL: @rdtime( + // LA32-NEXT: entry: +-// LA32-NEXT: TMP0:%.* = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #ATTR1:0-9+, !srcloc !2 +-// LA32-NEXT: TMP1:%.* = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #ATTR1, !srcloc !3 ++// LA32-NEXT: TMP0:%.* = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #ATTR1:0-9+, !srcloc META2:!0-9+ ++// LA32-NEXT: TMP1:%.* = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #ATTR1, !srcloc META3:!0-9+ + // LA32-NEXT: ret void + // + void rdtime() { +@@ -201,13 +201,28 @@ void loongarch_movgr2fcsr(int a) { + __builtin_loongarch_movgr2fcsr(1, a); + } + +-// CHECK-LABEL: @cacop_w( +-// CHECK-NEXT: entry: +-// CHECK-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 A:%.*, i32 1024) +-// CHECK-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 A, i32 1024) +-// CHECK-NEXT: ret void ++// LA32-LABEL: @cacop_w( ++// LA32-NEXT: entry: ++// LA32-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 A:%.*, i32 1024) ++// LA32-NEXT: tail call void @llvm.loongarch.cacop.w(i32 1, i32 A, i32 1024) ++// LA32-NEXT: ret void + // + void cacop_w(unsigned long int a) { + __cacop_w(1, a, 1024); + __builtin_loongarch_cacop_w(1, a, 1024); + } ++ ++// LA32-LABEL: @iocsrrd_h_result( ++// LA32-NEXT: entry: ++// LA32-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.iocsrrd.h(i32 A:%.*) ++// LA32-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.iocsrrd.h(i32 A) ++// LA32-NEXT: CONV2:%.* = and i32 TMP0, 255 ++// LA32-NEXT: ADD:%.* = add i32 TMP1, CONV2 ++// LA32-NEXT: CONV4:%.* = trunc i32 ADD to i16 ++// LA32-NEXT: ret i16 CONV4 ++// ++unsigned short iocsrrd_h_result(unsigned int a) { ++ unsigned short b = __iocsrrd_h(a); ++ unsigned short c = __builtin_loongarch_iocsrrd_h(a); ++ return b+c; ++} +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64.c b/clang/test/CodeGen/LoongArch/intrinsic-la64.c +index a740882eef54..48b6a7a3d227 100644 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la64.c ++++ b/clang/test/CodeGen/LoongArch/intrinsic-la64.c +@@ -387,7 +387,7 @@ unsigned int cpucfg(unsigned int a) { + + // CHECK-LABEL: @rdtime_d( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call { i64, i64 } asm sideeffect "rdtime.d $0, $1\0A\09", "=&r,=&r"() #ATTR1:0-9+, !srcloc !2 ++// CHECK-NEXT: TMP0:%.* = tail call { i64, i64 } asm sideeffect "rdtime.d $0, $1\0A\09", "=&r,=&r"() #ATTR1:0-9+, !srcloc META2:!0-9+ + // CHECK-NEXT: ret void + // + void rdtime_d() { +@@ -396,8 +396,8 @@ void rdtime_d() { + + // CHECK-LABEL: @rdtime( + // CHECK-NEXT: entry: +-// CHECK-NEXT: TMP0:%.* = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #ATTR1, !srcloc !3 +-// CHECK-NEXT: TMP1:%.* = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #ATTR1, !srcloc !4 ++// CHECK-NEXT: TMP0:%.* = tail call { i32, i32 } asm sideeffect "rdtimeh.w $0, $1\0A\09", "=&r,=&r"() #ATTR1, !srcloc META3:!0-9+ ++// CHECK-NEXT: TMP1:%.* = tail call { i32, i32 } asm sideeffect "rdtimel.w $0, $1\0A\09", "=&r,=&r"() #ATTR1, !srcloc META4:!0-9+ + // CHECK-NEXT: ret void + // + void rdtime() { +@@ -427,3 +427,18 @@ void loongarch_movgr2fcsr(int a) { + __movgr2fcsr(1, a); + __builtin_loongarch_movgr2fcsr(1, a); + } ++ ++// CHECK-LABEL: @iocsrrd_h_result( ++// CHECK-NEXT: entry: ++// CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.iocsrrd.h(i32 A:%.*) ++// CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.iocsrrd.h(i32 A) ++// CHECK-NEXT: CONV2:%.* = and i32 TMP0, 255 ++// CHECK-NEXT: ADD:%.* = add i32 TMP1, CONV2 ++// CHECK-NEXT: CONV4:%.* = trunc i32 ADD to i16 ++// CHECK-NEXT: ret i16 CONV4 ++// ++unsigned short iocsrrd_h_result(unsigned int a) { ++ unsigned short b = __iocsrrd_h(a); ++ unsigned short c = __builtin_loongarch_iocsrrd_h(a); ++ return b+c; ++} +-- +2.20.1 + + +From 47425dfdd1582ec652aba1c289f3a80fe25c1a8c Mon Sep 17 00:00:00 2001 +From: wanglei <wanglei@loongson.cn> +Date: Wed, 6 Mar 2024 10:03:28 +0800 +Subject: PATCH 9/9 ClangLoongArch Fix wrong return value type of + __iocsrrd_h (#84100) + +relate: +https: //gcc.gnu.org/pipermail/gcc-patches/2024-February/645016.html +(cherry picked from commit 2f479b811274fede36535e34ecb545ac22e399c3) +(cherry picked from commit 9b9aee16d4dcf1b4af49988ebd7918fa4ce77e44) +--- + clang/lib/Headers/larchintrin.h | 2 +- + clang/test/CodeGen/LoongArch/intrinsic-la32.c | 8 ++++---- + clang/test/CodeGen/LoongArch/intrinsic-la64.c | 8 ++++---- + 3 files changed, 9 insertions(+), 9 deletions(-) + +diff --git a/clang/lib/Headers/larchintrin.h b/clang/lib/Headers/larchintrin.h +index c5c533ee0b8c..24dd29ce91ff 100644 +--- a/clang/lib/Headers/larchintrin.h ++++ b/clang/lib/Headers/larchintrin.h +@@ -156,7 +156,7 @@ extern __inline unsigned char + return (unsigned char)__builtin_loongarch_iocsrrd_b((unsigned int)_1); + } + +-extern __inline unsigned char ++extern __inline unsigned short + __attribute__((__gnu_inline__, __always_inline__, __artificial__)) + __iocsrrd_h(unsigned int _1) { + return (unsigned short)__builtin_loongarch_iocsrrd_h((unsigned int)_1); +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la32.c b/clang/test/CodeGen/LoongArch/intrinsic-la32.c +index 6a8d99880be3..eb3f8cbe7ac4 100644 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la32.c ++++ b/clang/test/CodeGen/LoongArch/intrinsic-la32.c +@@ -215,11 +215,11 @@ void cacop_w(unsigned long int a) { + // LA32-LABEL: @iocsrrd_h_result( + // LA32-NEXT: entry: + // LA32-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.iocsrrd.h(i32 A:%.*) ++// LA32-NEXT: CONV_I:%.* = trunc i32 TMP0 to i16 + // LA32-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.iocsrrd.h(i32 A) +-// LA32-NEXT: CONV2:%.* = and i32 TMP0, 255 +-// LA32-NEXT: ADD:%.* = add i32 TMP1, CONV2 +-// LA32-NEXT: CONV4:%.* = trunc i32 ADD to i16 +-// LA32-NEXT: ret i16 CONV4 ++// LA32-NEXT: TMP2:%.* = trunc i32 TMP1 to i16 ++// LA32-NEXT: CONV3:%.* = add i16 TMP2, CONV_I ++// LA32-NEXT: ret i16 CONV3 + // + unsigned short iocsrrd_h_result(unsigned int a) { + unsigned short b = __iocsrrd_h(a); +diff --git a/clang/test/CodeGen/LoongArch/intrinsic-la64.c b/clang/test/CodeGen/LoongArch/intrinsic-la64.c +index 48b6a7a3d227..50ec358f546e 100644 +--- a/clang/test/CodeGen/LoongArch/intrinsic-la64.c ++++ b/clang/test/CodeGen/LoongArch/intrinsic-la64.c +@@ -431,11 +431,11 @@ void loongarch_movgr2fcsr(int a) { + // CHECK-LABEL: @iocsrrd_h_result( + // CHECK-NEXT: entry: + // CHECK-NEXT: TMP0:%.* = tail call i32 @llvm.loongarch.iocsrrd.h(i32 A:%.*) ++// CHECK-NEXT: CONV_I:%.* = trunc i32 TMP0 to i16 + // CHECK-NEXT: TMP1:%.* = tail call i32 @llvm.loongarch.iocsrrd.h(i32 A) +-// CHECK-NEXT: CONV2:%.* = and i32 TMP0, 255 +-// CHECK-NEXT: ADD:%.* = add i32 TMP1, CONV2 +-// CHECK-NEXT: CONV4:%.* = trunc i32 ADD to i16 +-// CHECK-NEXT: ret i16 CONV4 ++// CHECK-NEXT: TMP2:%.* = trunc i32 TMP1 to i16 ++// CHECK-NEXT: CONV3:%.* = add i16 TMP2, CONV_I ++// CHECK-NEXT: ret i16 CONV3 + // + unsigned short iocsrrd_h_result(unsigned int a) { + unsigned short b = __iocsrrd_h(a); +-- +2.20.1 +
View file
_service:tar_scm:0010-Backport-clang-Avoid-Wshadow-warning-when-init-capture-named.patch
Added
@@ -0,0 +1,255 @@ +From 6804a15df7959a14b97d41dd8c069d2d695392e4 Mon Sep 17 00:00:00 2001 +From: Mariya Podchishchaeva <mariya.podchishchaeva@intel.com> +Date: Mon, 12 Feb 2024 12:44:20 +0300 +Subject: PATCH clang Avoid -Wshadow warning when init-capture named same + as class field (#74512) + +Shadowing warning doesn't make much sense since field is not available +in lambda's body without capturing this. + +Fixes https://github.com/llvm/llvm-project/issues/71976 + +(cherry picked from commit c13b7485b87909fcf739f62cfa382b55407433c0) +--- + clang/docs/ReleaseNotes.rst | 3 + + clang/include/clang/Sema/ScopeInfo.h | 4 +- + clang/lib/Sema/SemaDecl.cpp | 73 +++++++++------ + clang/test/SemaCXX/warn-shadow-in-lambdas.cpp | 92 ++++++++++++++++++- + 4 files changed, 141 insertions(+), 31 deletions(-) + +diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst +index a1143e14562e..5086a56eb496 100644 +--- a/clang/docs/ReleaseNotes.rst ++++ b/clang/docs/ReleaseNotes.rst +@@ -730,6 +730,9 @@ Bug Fixes in This Version + ``thread_local`` instead of ``_Thread_local``. + Fixes (`#70068 <https://github.com/llvm/llvm-project/issues/70068>`_) and + (`#69167 <https://github.com/llvm/llvm-project/issues/69167>`_) ++- Clang's ``-Wshadow`` no longer warns when an init-capture is named the same as ++ a class field unless the lambda can capture this. ++ Fixes (`#71976 <https://github.com/llvm/llvm-project/issues/71976>`_) + + Bug Fixes to Compiler Builtins + ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +diff --git a/clang/include/clang/Sema/ScopeInfo.h b/clang/include/clang/Sema/ScopeInfo.h +index 26c0387dfc44..f4e1dba4e20d 100644 +--- a/clang/include/clang/Sema/ScopeInfo.h ++++ b/clang/include/clang/Sema/ScopeInfo.h +@@ -915,8 +915,8 @@ public: + /// that were defined in parent contexts. Used to avoid warnings when the + /// shadowed variables are uncaptured by this lambda. + struct ShadowedOuterDecl { +- const VarDecl *VD; +- const VarDecl *ShadowedDecl; ++ const NamedDecl *VD; ++ const NamedDecl *ShadowedDecl; + }; + llvm::SmallVector<ShadowedOuterDecl, 4> ShadowingDecls; + +diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp +index 21b5781a71cd..5481bbd22c66 100644 +--- a/clang/lib/Sema/SemaDecl.cpp ++++ b/clang/lib/Sema/SemaDecl.cpp +@@ -8269,28 +8269,40 @@ void Sema::CheckShadow(NamedDecl *D, NamedDecl *ShadowedDecl, + + unsigned WarningDiag = diag::warn_decl_shadow; + SourceLocation CaptureLoc; +- if (isa<VarDecl>(D) && isa<VarDecl>(ShadowedDecl) && NewDC && +- isa<CXXMethodDecl>(NewDC)) { ++ if (isa<VarDecl>(D) && NewDC && isa<CXXMethodDecl>(NewDC)) { + if (const auto *RD = dyn_cast<CXXRecordDecl>(NewDC->getParent())) { + if (RD->isLambda() && OldDC->Encloses(NewDC->getLexicalParent())) { +- if (RD->getLambdaCaptureDefault() == LCD_None) { +- // Try to avoid warnings for lambdas with an explicit capture list. ++ if (const auto *VD = dyn_cast<VarDecl>(ShadowedDecl)) { + const auto *LSI = cast<LambdaScopeInfo>(getCurFunction()); +- // Warn only when the lambda captures the shadowed decl explicitly. +- CaptureLoc = getCaptureLocation(LSI, cast<VarDecl>(ShadowedDecl)); +- if (CaptureLoc.isInvalid()) +- WarningDiag = diag::warn_decl_shadow_uncaptured_local; +- } else { +- // Remember that this was shadowed so we can avoid the warning if the +- // shadowed decl isn't captured and the warning settings allow it. ++ if (RD->getLambdaCaptureDefault() == LCD_None) { ++ // Try to avoid warnings for lambdas with an explicit capture ++ // list. Warn only when the lambda captures the shadowed decl ++ // explicitly. ++ CaptureLoc = getCaptureLocation(LSI, VD); ++ if (CaptureLoc.isInvalid()) ++ WarningDiag = diag::warn_decl_shadow_uncaptured_local; ++ } else { ++ // Remember that this was shadowed so we can avoid the warning if ++ // the shadowed decl isn't captured and the warning settings allow ++ // it. ++ cast<LambdaScopeInfo>(getCurFunction()) ++ ->ShadowingDecls.push_back({D, VD}); ++ return; ++ } ++ } ++ if (isa<FieldDecl>(ShadowedDecl)) { ++ // If lambda can capture this, then emit default shadowing warning, ++ // Otherwise it is not really a shadowing case since field is not ++ // available in lambda's body. ++ // At this point we don't know that lambda can capture this, so ++ // remember that this was shadowed and delay until we know. + cast<LambdaScopeInfo>(getCurFunction()) +- ->ShadowingDecls.push_back( +- {cast<VarDecl>(D), cast<VarDecl>(ShadowedDecl)}); ++ ->ShadowingDecls.push_back({D, ShadowedDecl}); + return; + } + } +- +- if (cast<VarDecl>(ShadowedDecl)->hasLocalStorage()) { ++ if (const auto *VD = dyn_cast<VarDecl>(ShadowedDecl); ++ VD && VD->hasLocalStorage()) { + // A variable can't shadow a local variable in an enclosing scope, if + // they are separated by a non-capturing declaration context. + for (DeclContext *ParentDC = NewDC; +@@ -8337,19 +8349,28 @@ void Sema::CheckShadow(NamedDecl *D, NamedDecl *ShadowedDecl, + /// when these variables are captured by the lambda. + void Sema::DiagnoseShadowingLambdaDecls(const LambdaScopeInfo *LSI) { + for (const auto &Shadow : LSI->ShadowingDecls) { +- const VarDecl *ShadowedDecl = Shadow.ShadowedDecl; ++ const NamedDecl *ShadowedDecl = Shadow.ShadowedDecl; + // Try to avoid the warning when the shadowed decl isn't captured. +- SourceLocation CaptureLoc = getCaptureLocation(LSI, ShadowedDecl); + const DeclContext *OldDC = ShadowedDecl->getDeclContext(); +- Diag(Shadow.VD->getLocation(), CaptureLoc.isInvalid() +- ? diag::warn_decl_shadow_uncaptured_local +- : diag::warn_decl_shadow) +- << Shadow.VD->getDeclName() +- << computeShadowedDeclKind(ShadowedDecl, OldDC) << OldDC; +- if (!CaptureLoc.isInvalid()) +- Diag(CaptureLoc, diag::note_var_explicitly_captured_here) +- << Shadow.VD->getDeclName() << /*explicitly*/ 0; +- Diag(ShadowedDecl->getLocation(), diag::note_previous_declaration); ++ if (const auto *VD = dyn_cast<VarDecl>(ShadowedDecl)) { ++ SourceLocation CaptureLoc = getCaptureLocation(LSI, VD); ++ Diag(Shadow.VD->getLocation(), ++ CaptureLoc.isInvalid() ? diag::warn_decl_shadow_uncaptured_local ++ : diag::warn_decl_shadow) ++ << Shadow.VD->getDeclName() ++ << computeShadowedDeclKind(ShadowedDecl, OldDC) << OldDC; ++ if (CaptureLoc.isValid()) ++ Diag(CaptureLoc, diag::note_var_explicitly_captured_here) ++ << Shadow.VD->getDeclName() << /*explicitly*/ 0; ++ Diag(ShadowedDecl->getLocation(), diag::note_previous_declaration); ++ } else if (isa<FieldDecl>(ShadowedDecl)) { ++ Diag(Shadow.VD->getLocation(), ++ LSI->isCXXThisCaptured() ? diag::warn_decl_shadow ++ : diag::warn_decl_shadow_uncaptured_local) ++ << Shadow.VD->getDeclName() ++ << computeShadowedDeclKind(ShadowedDecl, OldDC) << OldDC; ++ Diag(ShadowedDecl->getLocation(), diag::note_previous_declaration); ++ } + } + } + +diff --git a/clang/test/SemaCXX/warn-shadow-in-lambdas.cpp b/clang/test/SemaCXX/warn-shadow-in-lambdas.cpp +index bda6a65c0216..d54b394df4eb 100644 +--- a/clang/test/SemaCXX/warn-shadow-in-lambdas.cpp ++++ b/clang/test/SemaCXX/warn-shadow-in-lambdas.cpp +@@ -1,6 +1,6 @@ +-// RUN: %clang_cc1 -std=c++14 -verify -fsyntax-only -Wshadow -D AVOID %s +-// RUN: %clang_cc1 -std=c++14 -verify -fsyntax-only -Wshadow -Wshadow-uncaptured-local %s +-// RUN: %clang_cc1 -std=c++14 -verify -fsyntax-only -Wshadow-all %s ++// RUN: %clang_cc1 -std=c++14 -verify=expected,cxx14 -fsyntax-only -Wshadow -D AVOID %s ++// RUN: %clang_cc1 -std=c++14 -verify=expected,cxx14 -fsyntax-only -Wshadow -Wshadow-uncaptured-local %s ++// RUN: %clang_cc1 -std=c++14 -verify=expected,cxx14 -fsyntax-only -Wshadow-all %s + // RUN: %clang_cc1 -std=c++17 -verify -fsyntax-only -Wshadow-all %s + // RUN: %clang_cc1 -std=c++20 -verify -fsyntax-only -Wshadow-all %s + +@@ -179,3 +179,89 @@ void f() { + #endif + } + } ++ ++namespace GH71976 { ++#ifdef AVOID ++struct A { ++ int b = 5; ++ int foo() { ++ return b = b() { return b; }(); // no -Wshadow diagnostic, init-capture does not shadow b due to not capturing this ++ } ++}; ++ ++struct B { ++ int a; ++ void foo() { ++ auto b = a = this->a {}; // no -Wshadow diagnostic, init-capture does not shadow a due to not capturing his ++ } ++}; ++ ++struct C { ++ int b = 5; ++ int foo() { ++ return a = b() { ++ return =, b = a() { // no -Wshadow diagnostic, init-capture does not shadow b due to outer lambda ++ return b; ++ }(); ++ }(); ++ } ++}; ++ ++#else ++struct A { ++ int b = 5; // expected-note {{previous}} ++ int foo() { ++ return b = b() { return b; }(); // expected-warning {{declaration shadows a field}} ++ } ++}; ++ ++struct B { ++ int a; // expected-note {{previous}} ++ void foo() { ++ auto b = a = this->a {}; // expected-warning {{declaration shadows a field}} ++ } ++}; ++ ++struct C { ++ int b = 5; // expected-note {{previous}} ++ int foo() { ++ return a = b() { ++ return =, b = a() { // expected-warning {{declaration shadows a field}} ++ return b; ++ }(); ++ }(); ++ } ++}; ++ ++struct D { ++ int b = 5; // expected-note {{previous}} ++ int foo() { ++ return b = b, this() { return b; }(); // expected-warning {{declaration shadows a field}} ++ } ++}; ++ ++struct E { ++ int b = 5; ++ int foo() { ++ return a = b() { // expected-note {{previous}} ++ return =, a = a() { // expected-warning {{shadows a local}} ++ return a; ++ }(); ++ }(); ++ } ++}; ++ ++#endif ++ ++struct S { ++ int a ; ++}; ++ ++int foo() { ++ auto a = S{0}; // expected-note {{previous}} \ ++ // cxx14-warning {{decomposition declarations are a C++17 extension}} ++ a = a () { // expected-warning {{declaration shadows a structured binding}} ++ }(); ++} ++ ++} +-- +2.33.0 +
View file
_service:tar_scm:0011-Add-the-support-for-classic-flang.patch
Added
@@ -0,0 +1,4031 @@ +From 3d36d3891f2a654d5ec2a0ce3c2ff47b597d1c42 Mon Sep 17 00:00:00 2001 +From: luofeng14 <luofeng13@huawei.com> +Date: Mon, 11 Mar 2024 21:25:03 +0800 +Subject: PATCH sync classic flang patch + +--- + clang/CMakeLists.txt | 5 + + clang/include/clang/Basic/CodeGenOptions.h | 3 + + .../clang/Basic/DiagnosticDriverKinds.td | 6 + + clang/include/clang/Basic/MacroBuilder.h | 3 +- + clang/include/clang/Basic/Sanitizers.def | 2 + + clang/include/clang/Driver/CMakeLists.txt | 13 +- + clang/include/clang/Driver/Options.td | 197 ++- + clang/include/clang/Driver/ToolChain.h | 17 + + clang/include/clang/Driver/Types.def | 7 + + clang/include/clang/Driver/Types.h | 8 + + clang/include/clang/Frontend/Utils.h | 32 + + clang/lib/CodeGen/BackendUtil.cpp | 6 + + clang/lib/CodeGen/CGDebugInfo.cpp | 10 +- + clang/lib/Driver/CMakeLists.txt | 8 +- + clang/lib/Driver/Driver.cpp | 60 +- + clang/lib/Driver/OffloadBundler.cpp | 4 + + clang/lib/Driver/ToolChain.cpp | 36 +- + clang/lib/Driver/ToolChains/Clang.cpp | 60 + + clang/lib/Driver/ToolChains/ClassicFlang.cpp | 1217 +++++++++++++++++ + clang/lib/Driver/ToolChains/ClassicFlang.h | 49 + + clang/lib/Driver/ToolChains/CommonArgs.cpp | 69 +- + clang/lib/Driver/ToolChains/CommonArgs.h | 20 + + clang/lib/Driver/ToolChains/Cuda.cpp | 35 + + clang/lib/Driver/ToolChains/Cuda.h | 5 + + clang/lib/Driver/ToolChains/Darwin.cpp | 4 + + clang/lib/Driver/ToolChains/Gnu.cpp | 4 + + clang/lib/Driver/ToolChains/Linux.cpp | 190 +++ + clang/lib/Driver/ToolChains/Linux.h | 5 + + clang/lib/Driver/ToolChains/MSVC.cpp | 115 ++ + clang/lib/Driver/ToolChains/MSVC.h | 11 + + clang/lib/Driver/ToolChains/MinGW.cpp | 4 + + clang/lib/Driver/Types.cpp | 50 + + clang/lib/Frontend/InitPreprocessor.cpp | 20 - + clang/test/CMakeLists.txt | 1 + + .../test/CodeGen/libpgmath-logfun-aarch64.ll | 58 + + clang/test/CodeGen/libpgmath-logfun-x86_64.ll | 57 + + clang/test/Driver/autocomplete.c | 2 +- + clang/test/Driver/emit-flang-attrs.f90 | 58 + + .../test/Driver/flang/Inputs/llvm-ir-input.ll | 0 + .../flang/classic-flang-emit-flang-llvm.f95 | 10 + + .../flang/classic-flang-fp-contract.f95 | 27 + + .../flang/classic-flang-must-preprocess.F | 12 + + .../flang/classic-flang-must-preprocess.F95 | 12 + + .../test/Driver/flang/classic-flang-version.f | 3 + + .../flang/classic-flang-vscale-mbits.f95 | 28 + + .../Driver/flang/classic-flang-vscale.f95 | 28 + + clang/test/Driver/flang/classic-flang.f | 26 + + clang/test/Driver/flang/classic-flang.f95 | 120 ++ + clang/test/Driver/flang/flang.f90 | 2 + + clang/test/Driver/flang/flang_ucase.F90 | 2 + + clang/test/Driver/flang/llvm-ir-input.f | 7 + + .../Driver/flang/multiple-inputs-mixed.f90 | 2 + + clang/test/Driver/flang/multiple-inputs.f90 | 2 + + clang/test/Driver/flang/reassoc.f90 | 59 + + clang/test/Driver/fortran-phases.f90 | 119 ++ + clang/test/Driver/fortran-preprocessor.f90 | 48 + + clang/test/Driver/fortran.f95 | 2 + + clang/test/Driver/gfortran.f90 | 1 + + clang/test/Driver/lit.local.cfg | 3 + + clang/test/lit.cfg.py | 3 + + clang/test/lit.site.cfg.py.in | 1 + + clang/tools/driver/CMakeLists.txt | 2 +- + 62 files changed, 2933 insertions(+), 37 deletions(-) + create mode 100644 clang/lib/Driver/ToolChains/ClassicFlang.cpp + create mode 100644 clang/lib/Driver/ToolChains/ClassicFlang.h + mode change 100644 => 100755 clang/lib/Driver/Types.cpp + create mode 100644 clang/test/CodeGen/libpgmath-logfun-aarch64.ll + create mode 100644 clang/test/CodeGen/libpgmath-logfun-x86_64.ll + create mode 100644 clang/test/Driver/emit-flang-attrs.f90 + create mode 100644 clang/test/Driver/flang/Inputs/llvm-ir-input.ll + create mode 100644 clang/test/Driver/flang/classic-flang-emit-flang-llvm.f95 + create mode 100644 clang/test/Driver/flang/classic-flang-fp-contract.f95 + create mode 100644 clang/test/Driver/flang/classic-flang-must-preprocess.F + create mode 100644 clang/test/Driver/flang/classic-flang-must-preprocess.F95 + create mode 100644 clang/test/Driver/flang/classic-flang-version.f + create mode 100644 clang/test/Driver/flang/classic-flang-vscale-mbits.f95 + create mode 100644 clang/test/Driver/flang/classic-flang-vscale.f95 + create mode 100644 clang/test/Driver/flang/classic-flang.f + create mode 100644 clang/test/Driver/flang/classic-flang.f95 + create mode 100644 clang/test/Driver/flang/llvm-ir-input.f + create mode 100644 clang/test/Driver/flang/reassoc.f90 + create mode 100644 clang/test/Driver/fortran-phases.f90 + create mode 100644 clang/test/Driver/fortran-preprocessor.f90 + +diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt +index d558b0522e82..949f12d3ce8c 100644 +--- a/clang/CMakeLists.txt ++++ b/clang/CMakeLists.txt +@@ -392,6 +392,11 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) + endif() + endif() + ++option(LLVM_ENABLE_CLASSIC_FLANG "Build support for classic Flang instead of the new built-in Flang" OFF) ++if(LLVM_ENABLE_CLASSIC_FLANG) ++ add_definitions( -DENABLE_CLASSIC_FLANG ) ++endif() ++ + option(CLANG_BUILD_TOOLS + "Build the Clang tools. If OFF, just generate build targets." ON) + +diff --git a/clang/include/clang/Basic/CodeGenOptions.h b/clang/include/clang/Basic/CodeGenOptions.h +index 14fc94fe27f9..41577b97e030 100644 +--- a/clang/include/clang/Basic/CodeGenOptions.h ++++ b/clang/include/clang/Basic/CodeGenOptions.h +@@ -59,6 +59,9 @@ public: + Accelerate, // Use the Accelerate framework. + LIBMVEC, // GLIBC vector math library. + MASSV, // IBM MASS vector library. ++#ifdef ENABLE_CLASSIC_FLANG ++ PGMATH, // PGI math library. ++#endif + SVML, // Intel short vector math library. + SLEEF, // SLEEF SIMD Library for Evaluating Elementary Functions. + Darwin_libsystem_m, // Use Darwin's libsytem_m vector functions. +diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td +index 1b69324d073a..09a1949d7596 100644 +--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td ++++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td +@@ -131,6 +131,10 @@ def err_drv_invalid_linker_name : Error< + "invalid linker name in argument '%0'">; + def err_drv_invalid_rtlib_name : Error< + "invalid runtime library name in argument '%0'">; ++def err_drv_invalid_allocatable_mode : Error< ++ "invalid semantic mode for assignments to allocatables in argument '%0'">; ++def err_drv_unsupported_fixed_line_length : Error< ++ "unsupported fixed-format line length in argument '%0'">; + def err_drv_unsupported_rtlib_for_platform : Error< + "unsupported runtime library '%0' for platform '%1'">; + def err_drv_invalid_unwindlib_name : Error< +@@ -354,6 +358,8 @@ def err_drv_negative_columns : Error< + "invalid value '%1' in '%0', value must be 'none' or a positive integer">; + def err_drv_small_columns : Error< + "invalid value '%1' in '%0', value must be '%2' or greater">; ++def err_drv_clang_unsupported_minfo_arg : Error< ++ "'%0' option does not support '%1' value">; + + def err_drv_invalid_malign_branch_EQ : Error< + "invalid argument '%0' to -malign-branch=; each element must be one of: %1">; +diff --git a/clang/include/clang/Basic/MacroBuilder.h b/clang/include/clang/Basic/MacroBuilder.h +index 96e67cbbfa3f..bfc5e38c15e3 100644 +--- a/clang/include/clang/Basic/MacroBuilder.h ++++ b/clang/include/clang/Basic/MacroBuilder.h +@@ -24,9 +24,10 @@ class MacroBuilder { + raw_ostream &Out; + public: + MacroBuilder(raw_ostream &Output) : Out(Output) {} ++ virtual ~MacroBuilder() {} + + /// Append a \#define line for macro of the form "\#define Name Value\n". +- void defineMacro(const Twine &Name, const Twine &Value = "1") { ++ virtual void defineMacro(const Twine &Name, const Twine &Value = "1") { + Out << "#define " << Name << ' ' << Value << '\n'; + } + +diff --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def +index c2137e3f61f6..4f0a2bf332ef 100644 +--- a/clang/include/clang/Basic/Sanitizers.def ++++ b/clang/include/clang/Basic/Sanitizers.def +@@ -107,6 +107,8 @@ SANITIZER("signed-integer-overflow", SignedIntegerOverflow) + SANITIZER("unreachable", Unreachable) + SANITIZER("vla-bound", VLABound) + SANITIZER("vptr", Vptr) ++// fortran contiguous pointer checks ++SANITIZER("discontiguous", Discontiguous) + + // IntegerSanitizer + SANITIZER("unsigned-integer-overflow", UnsignedIntegerOverflow) +diff --git a/clang/include/clang/Driver/CMakeLists.txt b/clang/include/clang/Driver/CMakeLists.txt +index ea55ba0f1f27..8c0af1528a96 100644 +--- a/clang/include/clang/Driver/CMakeLists.txt ++++ b/clang/include/clang/Driver/CMakeLists.txt +@@ -1,7 +1,14 @@ + set(LLVM_TARGET_DEFINITIONS Options.td) ++ ++set(CLANG_DRIVER_OPTIONS) + if (BUILD_FOR_OPENEULER) +-tablegen(LLVM Options.inc -gen-opt-parser-defs -DBUILD_FOR_OPENEULER) +-else() +-tablegen(LLVM Options.inc -gen-opt-parser-defs) ++ list(APPEND CLANG_DRIVER_OPTIONS -DBUILD_FOR_OPENEULER ) + endif() ++ ++if (LLVM_ENABLE_CLASSIC_FLANG) ++ list(APPEND CLANG_DRIVER_OPTIONS -DENABLE_CLASSIC_FLANG ) ++endif() ++ ++tablegen(LLVM Options.inc ${CLANG_DRIVER_OPTIONS} -gen-opt-parser-defs ) ++ + add_public_tablegen_target(ClangDriverOptions) +diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td +index d4f7315bf8cb..365285966a67 100644 +--- a/clang/include/clang/Driver/Options.td ++++ b/clang/include/clang/Driver/Options.td +@@ -590,7 +590,11 @@ class InternalDriverOpt : Group<internal_driver_Group>, + Flags<NoXarchOption, HelpHidden>; + def driver_mode : Joined<"--", "driver-mode=">, Group<internal_driver_Group>, + Flags<CoreOption, NoXarchOption, HelpHidden>, ++#ifdef ENABLE_CLASSIC_FLANG ++ HelpText<"Set the driver mode to one of: 'gcc', 'g++', 'cpp', 'cl', or 'flang'">; ++#else + HelpText<"Set the driver mode to either 'gcc', 'g++', 'cpp', or 'cl'">; ++#endif + def rsp_quoting : Joined<"--", "rsp-quoting=">, Group<internal_driver_Group>, + Flags<CoreOption, NoXarchOption, HelpHidden>, + HelpText<"Set the rsp quoting to either 'posix', or 'windows'">; +@@ -1183,6 +1187,10 @@ def d_Flag : Flag<"-", "d">, Group<d_Group>; + def d_Joined : Joined<"-", "d">, Group<d_Group>; + def emit_ast : Flag<"-", "emit-ast">, Flags<CoreOption>, + HelpText<"Emit Clang AST files for source inputs">; ++#ifdef ENABLE_CLASSIC_FLANG ++def emit_flang_llvm : Flag<"-", "emit-flang-llvm">, ++ HelpText<"Emit Flang LLVM files for source inputs">; ++#endif + def emit_llvm : Flag<"-", "emit-llvm">, Flags<CC1Option, FC1Option, FlangOption>, Group<Action_Group>, + HelpText<"Use the LLVM representation for assembler and object files">; + def emit_interface_stubs : Flag<"-", "emit-interface-stubs">, Flags<CC1Option>, Group<Action_Group>, +@@ -2640,10 +2648,17 @@ def fno_experimental_isel : Flag<"-", "fno-experimental-isel">, Group<f_clang_ + Alias<fno_global_isel>; + def fveclib : Joined<"-", "fveclib=">, Group<f_Group>, Flags<CC1Option>, + HelpText<"Use the given vector functions library">, ++#ifdef ENABLE_CLASSIC_FLANG ++ Values<"Accelerate,libmvec,MASSV,PGMATH,SVML,SLEEF,Darwin_libsystem_m,ArmPL,none">, ++ NormalizedValuesScope<"CodeGenOptions">, ++ NormalizedValues<"Accelerate", "LIBMVEC", "MASSV", "PGMATH", "SVML", "SLEEF", ++ "Darwin_libsystem_m", "ArmPL", "NoLibrary">, ++#else + Values<"Accelerate,libmvec,MASSV,SVML,SLEEF,Darwin_libsystem_m,ArmPL,none">, + NormalizedValuesScope<"CodeGenOptions">, + NormalizedValues<"Accelerate", "LIBMVEC", "MASSV", "SVML", "SLEEF", + "Darwin_libsystem_m", "ArmPL", "NoLibrary">, ++#endif + MarshallingInfoEnum<CodeGenOpts<"VecLib">, "NoLibrary">; + def fno_lax_vector_conversions : Flag<"-", "fno-lax-vector-conversions">, Group<f_Group>, + Alias<flax_vector_conversions_EQ>, AliasArgs<"none">; +@@ -5382,6 +5397,20 @@ def module_dir : JoinedOrSeparate<"-", "module-dir">, MetaVarName<"<dir>">, + It is also added to the list of directories to be searched by an USE statement. + The default is the current directory.}>; + ++#ifdef ENABLE_CLASSIC_FLANG ++// Define a group for Fortran source format options. ++def fortran_format_Group : OptionGroup<"Fortran format Group">, Group<gfortran_Group>; ++def ffixed_form : Flag<"-", "ffixed-form">, Group<fortran_format_Group>, ++ HelpText<"Process source files in fixed form">; ++def fno_fixed_form : Flag<"-", "fno-fixed-form">, Group<fortran_format_Group>, ++ HelpText<"Disable fixed-form format for Fortran">; ++def ffree_form : Flag<"-", "ffree-form">, Group<fortran_format_Group>, ++ HelpText<"Process source files in free form">; ++def fno_free_form : Flag<"-", "fno-free-form">, Group<fortran_format_Group>, ++ HelpText<"Disable free-form format for Fortran">; ++def ffixed_line_length_VALUE : Joined<"-", "ffixed-line-length-">, Group<f_Group>, ++ HelpText<"Set line length in fixed-form format Fortran, current supporting only 72 and 132 characters">; ++#else + def ffixed_form : Flag<"-", "ffixed-form">, Group<f_Group>, + HelpText<"Process source files in fixed form">; + def ffree_form : Flag<"-", "ffree-form">, Group<f_Group>, +@@ -5391,6 +5420,7 @@ def ffixed_line_length_EQ : Joined<"-", "ffixed-line-length=">, Group<f_Group> + DocBrief<{Set column after which characters are ignored in typical fixed-form lines in the source + file}>; + def ffixed_line_length_VALUE : Joined<"-", "ffixed-line-length-">, Group<f_Group>, Alias<ffixed_line_length_EQ>; ++#endif + def fconvert_EQ : Joined<"-", "fconvert=">, Group<f_Group>, + HelpText<"Set endian conversion of data for unformatted files">; + def fopenacc : Flag<"-", "fopenacc">, Group<f_Group>, +@@ -5399,8 +5429,14 @@ def fdefault_double_8 : Flag<"-","fdefault-double-8">, Group<f_Group>, + HelpText<"Set the default double precision kind to an 8 byte wide type">; + def fdefault_integer_8 : Flag<"-","fdefault-integer-8">, Group<f_Group>, + HelpText<"Set the default integer and logical kind to an 8 byte wide type">; ++#ifdef ENABLE_CLASSIC_FLANG ++def fno_default_integer_8 : Flag<"-", "fno-default-integer-8">, Group<f_Group>; ++#endif + def fdefault_real_8 : Flag<"-","fdefault-real-8">, Group<f_Group>, + HelpText<"Set the default real kind to an 8 byte wide type">; ++#ifdef ENABLE_CLASSIC_FLANG ++def fno_default_real_8 : Flag<"-", "fno-default-real-8">, Group<f_Group>; ++#endif + def flarge_sizes : Flag<"-","flarge-sizes">, Group<f_Group>, + HelpText<"Use INTEGER(KIND=8) for the result type in size-related intrinsics">; + +@@ -5410,7 +5446,17 @@ def fintrinsic_modules_path : Separate<"-", "fintrinsic-modules-path">, Group + HelpText<"Specify where to find the compiled intrinsic modules">, + DocBrief<{This option specifies the location of pre-compiled intrinsic modules, + if they are not in the default location expected by the compiler.}>; +- ++#ifdef ENABLE_CLASSIC_FLANG ++def fbackslash : Flag<"-", "fbackslash">, Group<gfortran_Group>, ++ HelpText<"Specify that backslash in string introduces an escape character">, ++ DocBrief<{Change the interpretation of backslashes in string literals from ++a single backslash character to "C-style" escape characters.}>; ++def fno_backslash : Flag<"-", "fno-backslash">, Group<gfortran_Group>; ++// Add the options -f(no-)implicit-none and -f(no-)automatic for compatibility ++// reason. They are not implemented yet in Classic Flang for now. ++defm implicit_none : BooleanFFlag<"implicit-none">, Group<gfortran_Group>; ++def fno_automatic : Flag<"-", "fno-automatic">, Group<gfortran_Group>; ++#else + defm backslash : OptInFC1FFlag<"backslash", "Specify that backslash in string introduces an escape character">; + defm xor_operator : OptInFC1FFlag<"xor-operator", "Enable .XOR. as a synonym of .NEQV.">; + defm logical_abbreviations : OptInFC1FFlag<"logical-abbreviations", "Enable logical abbreviations">; +@@ -5419,6 +5465,7 @@ defm underscoring : OptInFC1FFlag<"underscoring", "Appends one trailing undersco + + def fno_automatic : Flag<"-", "fno-automatic">, Group<f_Group>, + HelpText<"Implies the SAVE attribute for non-automatic local objects in subprograms unless RECURSIVE">; ++#endif + + defm stack_arrays : BoolOptionWithoutMarshalling<"f", "stack-arrays", + PosFlag<SetTrue, , "Attempt to allocate array temporaries on the stack, no matter their size">, +@@ -5426,12 +5473,19 @@ defm stack_arrays : BoolOptionWithoutMarshalling<"f", "stack-arrays", + defm loop_versioning : BoolOptionWithoutMarshalling<"f", "version-loops-for-stride", + PosFlag<SetTrue, , "Create unit-strided versions of loops">, + NegFlag<SetFalse, , "Do not create unit-strided loops (default)">>; ++ + } // let Flags = FC1Option, FlangOption, FlangOnlyOption + ++#ifdef ENABLE_CLASSIC_FLANG ++def J : JoinedOrSeparate<"-", "J">, ++ Flags<RenderJoined>, ++ Group<gfortran_Group>; ++#else + def J : JoinedOrSeparate<"-", "J">, + Flags<RenderJoined, FlangOption, FC1Option, FlangOnlyOption>, + Group<gfortran_Group>, + Alias<module_dir>; ++#endif + + //===----------------------------------------------------------------------===// + // FC1 Options +@@ -7458,6 +7512,147 @@ def fcgl : DXCFlag<"fcgl">, Alias<emit_pristine_llvm>; + def enable_16bit_types : DXCFlag<"enable-16bit-types">, Alias<fnative_half_type>, + HelpText<"Enable 16-bit types and disable min precision types." + "Available in HLSL 2018 and shader model 6.2.">; ++ ++#ifdef ENABLE_CLASSIC_FLANG ++// Classic Flang options that we recognize in the driver and pass along when ++// invoking flang1/flang2 to compile Fortran code. ++def flang_rt_Group : OptionGroup<"Flang runtime library Group">; ++def pgi_fortran_Group : OptionGroup<"PGI Fortran compatibility Group">, ++ Flags<HelpHidden>; ++ ++// Classic Flang-specific options ++multiclass BooleanKFlag<string name> { ++ def _on : Flag<"-", "K"#name>; ++ def _off : Flag<"-", "Kno"#name>; ++} ++ ++multiclass BooleanMFlag<string name> { ++ def _on : Flag<"-", "M"#name>; ++ def _off : Flag<"-", "Mno"#name>; ++} ++ ++def Mfixed : Flag<"-", "Mfixed">, Group<fortran_format_Group>, ++ HelpText<"Force fixed-form format Fortran">, ++ Flags<HelpHidden>; ++def Mfree_on: Flag<"-", "Mfree">, Group<fortran_format_Group>, ++ HelpText<"Enable free-form format for Fortran">, ++ Flags<HelpHidden>; ++def Mfree_off: Flag<"-", "Mnofree">, Group<fortran_format_Group>, ++ HelpText<"Disable free-form format for Fortran">, ++ Flags<HelpHidden>; ++def Mfreeform_on: Flag<"-", "Mfreeform">, Group<fortran_format_Group>, ++ HelpText<"Enable free-form format for Fortran">, ++ Flags<HelpHidden>; ++def Mfreeform_off: Flag<"-", "Mnofreeform">, Group<fortran_format_Group>, ++ HelpText<"Disable free-form format for Fortran">, ++ Flags<HelpHidden>; ++ ++def Minfo_EQ : CommaJoined<"-", "Minfo=">, ++ HelpText<"Diagnostic information about successful optimizations">, ++ Values<"all,vect,inline">; ++def Minfoall : Flag<"-", "Minfo">, ++ HelpText<"Diagnostic information about all successful optimizations">; ++def Mneginfo_EQ : CommaJoined<"-", "Mneginfo=">, ++ HelpText<"Diagnostic information about missed optimizations">, ++ Values<"all,vect,inline">; ++def Mneginfoall : Flag<"-", "Mneginfo">, ++ HelpText<"Diagnostic information about all missed optimizations">; ++ ++def Mipa: Joined<"-", "Mipa">, Group<pgi_fortran_Group>; ++def Mstackarrays: Joined<"-", "Mstack_arrays">, Group<pgi_fortran_Group>; ++def pc: JoinedOrSeparate<"-", "pc">, Group<pgi_fortran_Group>; ++def Mfprelaxed: Joined<"-", "Mfprelaxed">, Group<pgi_fortran_Group>; ++def Mnofprelaxed: Joined<"-", "Mnofprelaxed">, Group<pgi_fortran_Group>; ++defm Mstride0: BooleanMFlag<"stride0">, Group<pgi_fortran_Group>; ++defm Mrecursive: BooleanMFlag<"recursive">, Group<pgi_fortran_Group>; ++defm Mreentrant: BooleanMFlag<"reentrant">, Group<pgi_fortran_Group>; ++defm Mbounds: BooleanMFlag<"bounds">, Group<pgi_fortran_Group>; ++def Mdaz_on: Flag<"-", "Mdaz">, Group<pgi_fortran_Group>, ++ HelpText<"Treat denormalized numbers as zero">; ++def Mdaz_off: Flag<"-", "Mnodaz">, Group<pgi_fortran_Group>, ++ HelpText<"Disable treating denormalized numbers as zero">; ++def Kieee_on : Flag<"-", "Kieee">, Group<pgi_fortran_Group>, ++ HelpText<"Enable IEEE division">; ++def Kieee_off : Flag<"-", "Knoieee">, Group<pgi_fortran_Group>, ++ HelpText<"Disable IEEE division">; ++def Mextend : Flag<"-", "Mextend">, Group<pgi_fortran_Group>, ++ HelpText<"Allow lines up to 132 characters in Fortran sources">; ++def Mpreprocess : Flag<"-", "Mpreprocess">, Group<pgi_fortran_Group>, ++ HelpText<"Preprocess Fortran files">; ++def Mstandard: Flag<"-", "Mstandard">, Group<pgi_fortran_Group>, ++ HelpText<"Check Fortran standard conformance">; ++def Mchkptr: Flag<"-", "Mchkptr">, Group<pgi_fortran_Group>; ++def Mwritable_constants: Flag<"-", "Mwritable-constants">, Group<pgi_fortran_Group>, ++ HelpText<"Store constants in the writable data segment">; ++defm Minline: BooleanMFlag<"inline">, Group<pgi_fortran_Group>; ++def fma: Flag<"-", "fma">, Group<pgi_fortran_Group>, ++ HelpText<"Enable generation of FMA instructions">; ++def nofma: Flag<"-", "nofma">, Group<pgi_fortran_Group>, ++ HelpText<"Disable generation of FMA instructions">; ++defm Mfma: BooleanMFlag<"fma">, Group<pgi_fortran_Group>, ++ HelpText<"Enable generation of FMA instructions">; ++def mp: Flag<"-", "mp">, Group<pgi_fortran_Group>, ++ HelpText<"Enable OpenMP">; ++def nomp: Flag<"-", "nomp">, Group<pgi_fortran_Group>, ++ HelpText<"Do not link with OpenMP library libomp">; ++def Mflushz_on: Flag<"-", "Mflushz">, Group<pgi_fortran_Group>, ++ HelpText<"Set SSE to flush-to-zero mode">; ++def Mflushz_off: Flag<"-", "Mnoflushz">, Group<pgi_fortran_Group>, ++ HelpText<"Disabling setting SSE to flush-to-zero mode">; ++def Msave_on: Flag<"-", "Msave">, Group<pgi_fortran_Group>, ++ HelpText<"Assume all Fortran variables have SAVE attribute">; ++def Msave_off: Flag<"-", "Mnosave">, Group<pgi_fortran_Group>, ++ HelpText<"Assume no Fortran variables have SAVE attribute">; ++def Mcache_align_on: Flag<"-", "Mcache_align">, Group<pgi_fortran_Group>, ++ HelpText<"Align large objects on cache-line boundaries">; ++def Mcache_align_off: Flag<"-", "Mnocache_align">, Group<pgi_fortran_Group>, ++ HelpText<"Disable aligning large objects on cache-line boundaries">; ++def ModuleDir : Separate<"-", "module">, Group<pgi_fortran_Group>, ++ HelpText<"Fortran module path">; ++def Minform_EQ : Joined<"-", "Minform=">, ++ HelpText<"Set error level of messages to display">; ++def Mallocatable_EQ : Joined<"-", "Mallocatable=">, ++ HelpText<"Select semantics for assignments to allocatables (F03 or F95)">; ++def Mbyteswapio: Flag<"-", "Mbyteswapio">, Group<pgi_fortran_Group>, ++ HelpText<"Swap byte-order for unformatted input/output">; ++def byteswapio: Flag<"-", "byteswapio">, Group<gfortran_Group>, ++ HelpText<"Swap byte-order for unformatted input/output">; ++def Mbackslash: Flag<"-", "Mbackslash">, Group<pgi_fortran_Group>, ++ HelpText<"Treat backslash like any other character in character strings">; ++def Mnobackslash: Flag<"-", "Mnobackslash">, Group<pgi_fortran_Group>, ++ HelpText<"Treat backslash as C-style escape character">; ++def staticFlangLibs: Flag<"-", "static-flang-libs">, Group<flang_rt_Group>, ++ HelpText<"Link using static Flang libraries">; ++def noFlangLibs: Flag<"-", "no-flang-libs">, Group<flang_rt_Group>, ++ HelpText<"Do not link against Flang libraries">; ++def r8: Flag<"-", "r8">, Group<pgi_fortran_Group>, ++ HelpText<"Treat REAL as REAL*8">; ++def i8: Flag<"-", "i8">, Group<pgi_fortran_Group>, ++ HelpText<"Treat INTEGER and LOGICAL as INTEGER*8 and LOGICAL*8">; ++def no_fortran_main: Flag<"-", "fno-fortran-main">, Group<gfortran_Group>, ++ HelpText<"Don't link in Fortran main">; ++def Mnomain: Flag<"-", "Mnomain">, Group<pgi_fortran_Group>, ++ HelpText<"Don't link in Fortran main">; ++def frelaxed_math : Flag<"-", "frelaxed-math">, Group<pgi_fortran_Group>, ++ HelpText<"Use relaxed Math intrinsic functions">; ++def Memit_dwarf_common_blocks_name: Flag<"-", "Memit-dwarf-common-blocks-name">, ++ Group<pgi_fortran_Group>, HelpText<"Emit COMMON blocks name in DWARF">; ++def Munixlogical: Flag<"-", "Munixlogical">, Group<pgi_fortran_Group>, ++ HelpText<"Use unixlogical for all loigical operations">; ++ ++// Flang internal debug options ++def Mx_EQ : Joined<"-", "Mx,">, Group<pgi_fortran_Group>; ++def My_EQ : Joined<"-", "My,">, Group<pgi_fortran_Group>; ++def Hx_EQ : Joined<"-", "Hx,">, Group<pgi_fortran_Group>; ++def Hy_EQ : Joined<"-", "Hy,">, Group<pgi_fortran_Group>; ++def Wm_EQ : Joined<"-", "Wm,">, Group<pgi_fortran_Group>; ++ ++def Mq_EQ : Joined<"-", "Mq,">, Group<pgi_fortran_Group>; ++def Hq_EQ : Joined<"-", "Hq,">, Group<pgi_fortran_Group>; ++def Mqq_EQ : Joined<"-", "Mqq,">, Group<pgi_fortran_Group>; ++def Hqq_EQ : Joined<"-", "Hqq,">, Group<pgi_fortran_Group>; ++def Wh_EQ : Joined<"-", "Wh,">, Group<pgi_fortran_Group>; ++#endif + def hlsl_entrypoint : Option<"-", "hlsl-entry", KIND_SEPARATE>, + Group<dxc_Group>, + Flags<CC1Option>, +diff --git a/clang/include/clang/Driver/ToolChain.h b/clang/include/clang/Driver/ToolChain.h +index 2e74507f7126..735e72491488 100644 +--- a/clang/include/clang/Driver/ToolChain.h ++++ b/clang/include/clang/Driver/ToolChain.h +@@ -651,6 +651,16 @@ public: + AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const; + ++#ifdef ENABLE_CLASSIC_FLANG ++ /// \brief Add the flang arguments for system include paths. ++ /// ++ /// This routine is responsible for adding the -stdinc argument to ++ /// include headers and module files from standard system header directories. ++ virtual void ++ AddFlangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, ++ llvm::opt::ArgStringList &Flang1Args) const { } ++#endif ++ + /// Add options that need to be passed to cc1 for this target. + virtual void addClangTargetOptions(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args, +@@ -757,6 +767,13 @@ public: + virtual void AddHIPRuntimeLibArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const {} + ++#ifdef ENABLE_CLASSIC_FLANG ++ /// AddFortranStdlibLibArgs - Add the system specific linker arguments to use ++ /// for the given Fortran runtime library type. ++ virtual void AddFortranStdlibLibArgs(const llvm::opt::ArgList &Args, ++ llvm::opt::ArgStringList &CmdArgs) const; ++#endif ++ + /// Return sanitizers which are available in this toolchain. + virtual SanitizerMask getSupportedSanitizers() const; + +diff --git a/clang/include/clang/Driver/Types.def b/clang/include/clang/Driver/Types.def +index aaea3ec0f9c8..2f241b02676b 100644 +--- a/clang/include/clang/Driver/Types.def ++++ b/clang/include/clang/Driver/Types.def +@@ -77,8 +77,15 @@ TYPE("c++-module-cpp-output", PP_CXXModule, INVALID, "iim", phases + TYPE("ada", Ada, INVALID, nullptr, phases::Compile, phases::Backend, phases::Assemble, phases::Link) + TYPE("assembler", PP_Asm, INVALID, "s", phases::Assemble, phases::Link) + TYPE("assembler-with-cpp", Asm, PP_Asm, "S", phases::Preprocess, phases::Assemble, phases::Link) ++#ifdef ENABLE_CLASSIC_FLANG ++TYPE("f77", PP_F_FixedForm, INVALID, "f", phases::Compile, phases::Backend, phases::Assemble, phases::Link) ++TYPE("f77-cpp-input", F_FixedForm, PP_F_FixedForm, "F", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) ++TYPE("f95", PP_F_FreeForm, INVALID, "f95", phases::Compile, phases::Backend, phases::Assemble, phases::Link) ++TYPE("f95-cpp-input", F_FreeForm, PP_F_FreeForm, "F95", phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) ++#else + TYPE("f95", PP_Fortran, INVALID, "i", phases::Compile, phases::Backend, phases::Assemble, phases::Link) + TYPE("f95-cpp-input", Fortran, PP_Fortran, nullptr, phases::Preprocess, phases::Compile, phases::Backend, phases::Assemble, phases::Link) ++#endif + TYPE("java", Java, INVALID, nullptr, phases::Compile, phases::Backend, phases::Assemble, phases::Link) + + // LLVM IR/LTO types. We define separate types for IR and LTO because LTO +diff --git a/clang/include/clang/Driver/Types.h b/clang/include/clang/Driver/Types.h +index 4a21af3534de..3a8b785a4c39 100644 +--- a/clang/include/clang/Driver/Types.h ++++ b/clang/include/clang/Driver/Types.h +@@ -95,6 +95,14 @@ namespace types { + /// isOpenCL - Is this an "OpenCL" input. + bool isOpenCL(ID Id); + ++#ifdef ENABLE_CLASSIC_FLANG ++ /// isFreeFormFortran -- is it a free form layout Fortran input ++ bool isFreeFormFortran(ID Id); ++ ++ /// isFixedFormFortran -- is it a fixed form layout Fortran input ++ bool isFixedFormFortran(ID Id); ++#endif ++ + /// isHLSL - Is this an HLSL input. + bool isHLSL(ID Id); + +diff --git a/clang/include/clang/Frontend/Utils.h b/clang/include/clang/Frontend/Utils.h +index 143cf4359f00..c51c617c1379 100644 +--- a/clang/include/clang/Frontend/Utils.h ++++ b/clang/include/clang/Frontend/Utils.h +@@ -17,8 +17,10 @@ + #include "clang/Basic/LLVM.h" + #include "clang/Driver/OptionUtils.h" + #include "clang/Frontend/DependencyOutputOptions.h" ++#include "clang/Basic/TargetInfo.h" + #include "llvm/ADT/ArrayRef.h" + #include "llvm/ADT/IntrusiveRefCntPtr.h" ++#include "llvm/ADT/StringExtras.h" + #include "llvm/ADT/StringMap.h" + #include "llvm/ADT/StringRef.h" + #include "llvm/ADT/StringSet.h" +@@ -31,6 +33,12 @@ + #include <utility> + #include <vector> + ++namespace llvm { ++ ++class StringRef; ++ ++} // namespace llvm ++ + namespace clang { + + class ASTReader; +@@ -39,6 +47,7 @@ class CompilerInvocation; + class DiagnosticsEngine; + class ExternalSemaSource; + class FrontendOptions; ++class MacroBuilder; + class PCHContainerReader; + class Preprocessor; + class PreprocessorOptions; +@@ -54,6 +63,29 @@ void InitializePreprocessor(Preprocessor &PP, const PreprocessorOptions &PPOpts, + void DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, + const PreprocessorOutputOptions &Opts); + ++/// DefineTypeSize - Emit a macro to the predefines buffer that declares a macro ++/// named MacroName with the max value for a type with width 'TypeWidth' a ++/// signedness of 'isSigned' and with a value suffix of 'ValSuffix' (e.g. LL). ++template<typename T> ++static void DefineTypeSize(const Twine &MacroName, unsigned TypeWidth, ++ StringRef ValSuffix, bool isSigned, ++ T &Builder) { ++ static_assert(std::is_base_of<MacroBuilder, T>::value, "Illegal T value"); ++ llvm::APInt MaxVal = isSigned ? llvm::APInt::getSignedMaxValue(TypeWidth) ++ : llvm::APInt::getMaxValue(TypeWidth); ++ Builder.defineMacro(MacroName, toString(MaxVal, 10, isSigned) + ValSuffix); ++} ++ ++/// DefineTypeSize - An overloaded helper that uses TargetInfo to determine ++/// the width, suffix, and signedness of the given type ++template<typename T> ++static void DefineTypeSize(const Twine &MacroName, TargetInfo::IntType Ty, ++ const TargetInfo &TI, T &Builder) { ++ static_assert(std::is_base_of<MacroBuilder, T>::value, "Illegal T value"); ++ DefineTypeSize(MacroName, TI.getTypeWidth(Ty), TI.getTypeConstantSuffix(Ty), ++ TI.isTypeSigned(Ty), Builder); ++} ++ + /// An interface for collecting the dependencies of a compilation. Users should + /// use \c attachToPreprocessor and \c attachToASTReader to get all of the + /// dependencies. +diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp +index 483f3e787a78..f962d60870d1 100644 +--- a/clang/lib/CodeGen/BackendUtil.cpp ++++ b/clang/lib/CodeGen/BackendUtil.cpp +@@ -272,6 +272,12 @@ static TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple, + TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::MASSV, + TargetTriple); + break; ++#ifdef ENABLE_CLASSIC_FLANG ++ case CodeGenOptions::PGMATH: ++ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::PGMATH, ++ TargetTriple); ++ break; ++#endif + case CodeGenOptions::SVML: + TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::SVML, + TargetTriple); +diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp +index d8eb2aecb87a..3b140860cce4 100644 +--- a/clang/lib/CodeGen/CGDebugInfo.cpp ++++ b/clang/lib/CodeGen/CGDebugInfo.cpp +@@ -3980,7 +3980,8 @@ CGDebugInfo::getGlobalVariableForwardDeclaration(const VarDecl *VD) { + auto Align = getDeclAlignIfRequired(VD, CGM.getContext()); + auto *GV = DBuilder.createTempGlobalVariableFwdDecl( + DContext, Name, LinkageName, Unit, Line, getOrCreateType(T, Unit), +- !VD->isExternallyVisible(), nullptr, TemplateParameters, Align); ++ !VD->isExternallyVisible(), nullptr, TemplateParameters, ++ llvm::DINode::FlagZero, Align); + FwdDeclReplaceMap.emplace_back( + std::piecewise_construct, + std::make_tuple(cast<VarDecl>(VD->getCanonicalDecl())), +@@ -5467,7 +5468,7 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, + Var->hasLocalLinkage(), true, + Expr.empty() ? nullptr : DBuilder.createExpression(Expr), + getOrCreateStaticDataMemberDeclarationOrNull(D), TemplateParameters, +- Align, Annotations); ++ llvm::DINode::FlagZero, Align, Annotations); + Var->addDebugInfo(GVE); + } + DeclCacheD->getCanonicalDecl().reset(GVE); +@@ -5563,7 +5564,7 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) { + GV.reset(DBuilder.createGlobalVariableExpression( + DContext, Name, StringRef(), Unit, getLineNumber(VD->getLocation()), Ty, + true, true, InitExpr, getOrCreateStaticDataMemberDeclarationOrNull(VarD), +- TemplateParameters, Align)); ++ TemplateParameters, llvm::DINode::FlagZero, Align)); + } + + void CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var, +@@ -5581,7 +5582,8 @@ void CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var, + llvm::DIGlobalVariableExpression *GVE = + DBuilder.createGlobalVariableExpression( + DContext, Name, StringRef(), Unit, getLineNumber(D->getLocation()), +- Ty, false, false, nullptr, nullptr, nullptr, Align); ++ Ty, false, false, nullptr, nullptr, nullptr, llvm::DINode::FlagZero, ++ Align); + Var->addDebugInfo(GVE); + } + +diff --git a/clang/lib/Driver/CMakeLists.txt b/clang/lib/Driver/CMakeLists.txt +index a6bd2d41e797..ac30007588b1 100644 +--- a/clang/lib/Driver/CMakeLists.txt ++++ b/clang/lib/Driver/CMakeLists.txt +@@ -14,6 +14,12 @@ if(WIN32) + set(system_libs version) + endif() + ++if(LLVM_ENABLE_CLASSIC_FLANG) ++ set(TOOLCHAINS_FLANG_CPP ToolChains/ClassicFlang.cpp) ++else() ++ set(TOOLCHAINS_FLANG_CPP ToolChains/Flang.cpp) ++endif() ++ + add_clang_library(clangDriver + Action.cpp + Compilation.cpp +@@ -56,7 +62,7 @@ add_clang_library(clangDriver + ToolChains/Cuda.cpp + ToolChains/Darwin.cpp + ToolChains/DragonFly.cpp +- ToolChains/Flang.cpp ++ ${TOOLCHAINS_FLANG_CPP} + ToolChains/FreeBSD.cpp + ToolChains/Fuchsia.cpp + ToolChains/Gnu.cpp +diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp +index a7e4edb3ab5a..8bfee5928fe5 100644 +--- a/clang/lib/Driver/Driver.cpp ++++ b/clang/lib/Driver/Driver.cpp +@@ -377,10 +377,14 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL, + (PhaseArg = DAL.getLastArg(options::OPT_fmodule_header, + options::OPT_fmodule_header_EQ))) { + FinalPhase = phases::Precompile; ++ + // -{fsyntax-only,-analyze,emit-ast} only run up to the compiler. + } else if ((PhaseArg = DAL.getLastArg(options::OPT_fsyntax_only)) || + (PhaseArg = DAL.getLastArg(options::OPT_print_supported_cpus)) || + (PhaseArg = DAL.getLastArg(options::OPT_module_file_info)) || ++#ifdef ENABLE_CLASSIC_FLANG ++ (PhaseArg = DAL.getLastArg(options::OPT_emit_flang_llvm)) || ++#endif + (PhaseArg = DAL.getLastArg(options::OPT_verify_pch)) || + (PhaseArg = DAL.getLastArg(options::OPT_rewrite_objc)) || + (PhaseArg = DAL.getLastArg(options::OPT_rewrite_legacy_objc)) || +@@ -1983,7 +1987,11 @@ void Driver::PrintHelp(bool ShowHidden) const { + + void Driver::PrintVersion(const Compilation &C, raw_ostream &OS) const { + if (IsFlangMode()) { ++#ifdef ENABLE_CLASSIC_FLANG ++ OS << getClangToolFullVersion("flang") << '\n'; ++#else + OS << getClangToolFullVersion("flang-new") << '\n'; ++#endif + } else { + // FIXME: The following handlers should use a callback mechanism, we don't + // know what the client would like to do. +@@ -2665,7 +2673,15 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args, + // stdin must be handled specially. + if (memcmp(Value, "-", 2) == 0) { + if (IsFlangMode()) { ++#ifdef ENABLE_CLASSIC_FLANG ++ // If running with -E, treat as needing preprocessing ++ if (!Args.hasArgNoClaim(options::OPT_E)) ++ Ty = types::TY_PP_F_FreeForm; ++ else ++ Ty = types::TY_F_FreeForm; ++#else + Ty = types::TY_Fortran; ++#endif + } else { + // If running with -E, treat as a C input (this changes the + // builtin macros, for example). This may be overridden by -ObjC +@@ -2687,6 +2703,16 @@ void Driver::BuildInputs(const ToolChain &TC, DerivedArgList &Args, + // idea of what .s is. + if (const char *Ext = strrchr(Value, '.')) + Ty = TC.LookupTypeForExtension(Ext + 1); ++#ifdef ENABLE_CLASSIC_FLANG ++ // If called with -E, treat the inputs as needing preprocessing ++ // regardless of extension ++ if (IsFlangMode() && Args.hasArgNoClaim(options::OPT_E)) { ++ if (Ty == types::TY_PP_F_FreeForm) ++ Ty = types::TY_F_FreeForm; ++ else if (Ty == types::TY_PP_F_FixedForm) ++ Ty = types::TY_F_FixedForm; ++ } ++#endif + + if (Ty == types::TY_INVALID) { + if (IsCLMode() && (Args.hasArgNoClaim(options::OPT_E) || CCGenDiagnostics)) +@@ -4011,6 +4037,14 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args, + if (InputArg->isClaimed()) + continue; + ++#ifdef ENABLE_CLASSIC_FLANG ++ // If the input is detected as already preprocessed (e.g. has the .f95 ++ // extension), and the user specifies -E, preprocess the file anyway. ++ if (IsFlangMode() && InitialPhase == phases::Compile && ++ FinalPhase == phases::Preprocess) ++ continue; ++#endif ++ + // Claim here to avoid the more general unused warning. + InputArg->claim(); + +@@ -4745,6 +4779,10 @@ Action *Driver::ConstructPhaseAction( + return C.MakeAction<VerifyPCHJobAction>(Input, types::TY_Nothing); + if (Args.hasArg(options::OPT_extract_api)) + return C.MakeAction<ExtractAPIJobAction>(Input, types::TY_API_INFO); ++#ifdef ENABLE_CLASSIC_FLANG ++ if (IsFlangMode()) ++ return C.MakeAction<CompileJobAction>(Input, types::TY_LLVM_IR); ++#endif + return C.MakeAction<CompileJobAction>(Input, types::TY_LLVM_BC); + } + case phases::Backend: { +@@ -5199,6 +5237,12 @@ class ToolSelector final { + if (!T->hasIntegratedBackend() && !(OutputIsLLVM && T->canEmitIR())) + return nullptr; + ++#ifdef ENABLE_CLASSIC_FLANG ++ // Classic Flang is not integrated with the backend. ++ if (C.getDriver().IsFlangMode() && !T->hasIntegratedAssembler()) ++ return nullptr; ++#endif ++ + if (T->canEmitIR() && ((SaveTemps && !InputIsBitcode) || EmbedBitcode)) + return nullptr; + +@@ -5214,8 +5258,17 @@ class ToolSelector final { + /// are appended to \a CollapsedOffloadAction. + void combineWithPreprocessor(const Tool *T, ActionList &Inputs, + ActionList &CollapsedOffloadAction) { ++#ifdef ENABLE_CLASSIC_FLANG ++ // flang1 always combines preprocessing and compilation. ++ // Do not return early even when -save-temps is used. ++ if (!T || !T->hasIntegratedCPP() || ++ (strcmp(T->getName(), "classic-flang") && ++ !canCollapsePreprocessorAction())) ++ return; ++#else + if (!T || !canCollapsePreprocessorAction() || !T->hasIntegratedCPP()) + return; ++#endif + + // Attempt to get a preprocessor action dependence. + ActionList PreprocessJobOffloadActions; +@@ -6418,8 +6471,11 @@ bool Driver::ShouldUseFlangCompiler(const JobAction &JA) const { + return false; + + // And say "no" if this is not a kind of action flang understands. +- if (!isa<PreprocessJobAction>(JA) && !isa<CompileJobAction>(JA) && +- !isa<BackendJobAction>(JA)) ++ if (!isa<PreprocessJobAction>(JA) && !isa<CompileJobAction>(JA) ++#ifndef ENABLE_CLASSIC_FLANG ++ && !isa<BackendJobAction>(JA) ++#endif ++ ) + return false; + + return true; +diff --git a/clang/lib/Driver/OffloadBundler.cpp b/clang/lib/Driver/OffloadBundler.cpp +index 0ddfb07fdad5..acc4e9128cb1 100644 +--- a/clang/lib/Driver/OffloadBundler.cpp ++++ b/clang/lib/Driver/OffloadBundler.cpp +@@ -864,6 +864,10 @@ CreateFileHandler(MemoryBuffer &FirstInput, + return std::make_unique<TextFileHandler>(/*Comment=*/"#"); + if (FilesType == "ll") + return std::make_unique<TextFileHandler>(/*Comment=*/";"); ++#ifdef ENABLE_CLASSIC_FLANG ++ if (FilesType == "f95") ++ return std::make_unique<TextFileHandler>(/*Comment=*/"!"); ++#endif + if (FilesType == "bc") + return std::make_unique<BinaryFileHandler>(BundlerConfig); + if (FilesType == "s") +diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp +index 0146d8af3549..3d92b7c2a3bd 100644 +--- a/clang/lib/Driver/ToolChain.cpp ++++ b/clang/lib/Driver/ToolChain.cpp +@@ -10,6 +10,9 @@ + #include "ToolChains/Arch/AArch64.h" + #include "ToolChains/Arch/ARM.h" + #include "ToolChains/Clang.h" ++#ifdef ENABLE_CLASSIC_FLANG ++#include "ToolChains/ClassicFlang.h" ++#endif + #include "ToolChains/CommonArgs.h" + #include "ToolChains/Flang.h" + #include "ToolChains/InterfaceStubs.h" +@@ -441,7 +444,11 @@ Tool *ToolChain::getClang() const { + + Tool *ToolChain::getFlang() const { + if (!Flang) ++#ifdef ENABLE_CLASSIC_FLANG ++ Flang.reset(new tools::ClassicFlang(*this)); ++#else + Flang.reset(new tools::Flang(*this)); ++#endif + return Flang.get(); + } + +@@ -864,13 +871,13 @@ std::string ToolChain::GetStaticLibToolPath() const { + + types::ID ToolChain::LookupTypeForExtension(StringRef Ext) const { + types::ID id = types::lookupTypeForExtension(Ext); +- ++#ifndef ENABLE_CLASSIC_FLANG + // Flang always runs the preprocessor and has no notion of "preprocessed + // fortran". Here, TY_PP_Fortran is coerced to TY_Fortran to avoid treating + // them differently. + if (D.IsFlangMode() && id == types::TY_PP_Fortran) + id = types::TY_Fortran; +- ++#endif + return id; + } + +@@ -1223,6 +1230,31 @@ void ToolChain::AddCCKextLibArgs(const ArgList &Args, + CmdArgs.push_back("-lcc_kext"); + } + ++#ifdef ENABLE_CLASSIC_FLANG ++void ToolChain::AddFortranStdlibLibArgs(const ArgList &Args, ++ ArgStringList &CmdArgs) const { ++ bool StaticFlangLibs = false; ++ if (Args.hasArg(options::OPT_staticFlangLibs)) { ++ StaticFlangLibs = true; ++ Args.ClaimAllArgs(options::OPT_staticFlangLibs); ++ } ++ ++ if (StaticFlangLibs && !Args.hasArg(options::OPT_static)) ++ CmdArgs.push_back("-Bstatic"); ++ CmdArgs.push_back("-lflang"); ++ CmdArgs.push_back("-lflangrti"); ++ CmdArgs.push_back("-lpgmath"); ++ if (StaticFlangLibs && !Args.hasArg(options::OPT_static)) ++ CmdArgs.push_back("-Bdynamic"); ++ ++ // Always link Fortran executables with pthreads. ++ CmdArgs.push_back("-lpthread"); ++ ++ if (!Triple.isOSDarwin()) ++ CmdArgs.push_back("-lrt"); ++} ++#endif ++ + bool ToolChain::isFastMathRuntimeAvailable(const ArgList &Args, + std::string &Path) const { + // Do not check for -fno-fast-math or -fno-unsafe-math when -Ofast passed +diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp +index 793af55a1e5f..c5ba8a690687 100644 +--- a/clang/lib/Driver/ToolChains/Clang.cpp ++++ b/clang/lib/Driver/ToolChains/Clang.cpp +@@ -5367,6 +5367,66 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, + } + A->render(Args, CmdArgs); + } ++#ifdef ENABLE_CLASSIC_FLANG ++ // Use PGMATH for Classic Flang by default. ++ else { ++ CmdArgs.push_back("-fveclib=PGMATH"); ++ } ++ ++ std::string PassRemarkVal(""), PassRemarkOpt(""); ++ if (Args.getLastArg(options::OPT_Minfoall)) { ++ PassRemarkVal = ".*"; ++ Args.ClaimAllArgs(options::OPT_Minfoall); ++ } else if (Arg *A = Args.getLastArg(options::OPT_Minfo_EQ)) { ++ for (StringRef val : A->getValues()) { ++ if (val.equals("all")) { ++ PassRemarkVal = ".*"; ++ break; ++ } else if (val.equals("inline") || val.equals("vect")) { ++ PassRemarkVal += PassRemarkVal.empty() ? "" : "|"; ++ PassRemarkVal += val; ++ } else { ++ D.Diag(diag::err_drv_clang_unsupported_minfo_arg) ++ << A->getOption().getName() ++ << val.str(); ++ break; ++ } ++ } ++ } ++ PassRemarkOpt = "-pass-remarks=" + PassRemarkVal; ++ CmdArgs.push_back("-mllvm"); ++ CmdArgs.push_back(Args.MakeArgString(PassRemarkOpt)); ++ Args.ClaimAllArgs(options::OPT_Minfo_EQ); ++ PassRemarkVal.clear(); ++ PassRemarkOpt.clear(); ++ ++ if (Args.getLastArg(options::OPT_Mneginfoall)) { ++ PassRemarkVal = ".*"; ++ Args.ClaimAllArgs(options::OPT_Mneginfoall); ++ } else if (Arg *A = Args.getLastArg(options::OPT_Mneginfo_EQ)) { ++ for (StringRef val : A->getValues()) { ++ if (val.equals("all")) { ++ PassRemarkVal = ".*"; ++ break; ++ } else if (val.equals("inline") || val.equals("vect")) { ++ PassRemarkVal += PassRemarkVal.empty() ? "" : "|"; ++ PassRemarkVal += val; ++ } else { ++ D.Diag(diag::err_drv_clang_unsupported_minfo_arg) ++ << A->getOption().getName() ++ << val.str(); ++ break; ++ } ++ } ++ } ++ PassRemarkOpt = "-pass-remarks-missed=" + PassRemarkVal; ++ CmdArgs.push_back("-mllvm"); ++ CmdArgs.push_back(Args.MakeArgString(PassRemarkOpt)); ++ PassRemarkOpt = "-pass-remarks-analysis=" + PassRemarkVal; ++ CmdArgs.push_back("-mllvm"); ++ CmdArgs.push_back(Args.MakeArgString(PassRemarkOpt)); ++ Args.ClaimAllArgs(options::OPT_Mneginfo_EQ); ++#endif + + if (Args.hasFlag(options::OPT_fmerge_all_constants, + options::OPT_fno_merge_all_constants, false)) +diff --git a/clang/lib/Driver/ToolChains/ClassicFlang.cpp b/clang/lib/Driver/ToolChains/ClassicFlang.cpp +new file mode 100644 +index 000000000000..b8be0be20b07 +--- /dev/null ++++ b/clang/lib/Driver/ToolChains/ClassicFlang.cpp +@@ -0,0 +1,1217 @@ ++//===-- ClassicFlang.cpp - Flang+LLVM ToolChain Implementations -*- C++ -*-===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "ClassicFlang.h" ++#include "CommonArgs.h" ++#include "clang/Driver/InputInfo.h" ++#include "clang/Basic/CharInfo.h" ++#include "clang/Basic/LangOptions.h" ++#include "clang/Basic/MacroBuilder.h" ++#include "clang/Basic/ObjCRuntime.h" ++#include "clang/Basic/TargetInfo.h" ++#include "clang/Basic/TargetOptions.h" ++#include "clang/Basic/Version.h" ++#include "clang/Config/config.h" ++#include "clang/Driver/DriverDiagnostic.h" ++#include "clang/Driver/Options.h" ++#include "clang/Driver/SanitizerArgs.h" ++#include "clang/Driver/XRayArgs.h" ++#include "clang/Frontend/CompilerInstance.h" ++#include "clang/Frontend/Utils.h" ++#include "llvm/ADT/StringExtras.h" ++#include "llvm/Option/ArgList.h" ++#include "llvm/Support/CodeGen.h" ++#include "llvm/Support/Compression.h" ++#include "llvm/Support/FileSystem.h" ++#include "llvm/Support/Path.h" ++#include "llvm/Support/Process.h" ++#include "llvm/Support/YAMLParser.h" ++#include "llvm/TargetParser/TargetParser.h" ++ ++#ifdef LLVM_ON_UNIX ++#include <unistd.h> // For getuid(). ++#endif ++ ++using namespace clang::driver; ++using namespace clang::driver::tools; ++using namespace clang; ++using namespace llvm::opt; ++ ++class ClassicFlangMacroBuilder : public MacroBuilder { ++ ArgStringList &CmdArgs; ++ const ArgList &DriverArgs; ++ public: ++ ClassicFlangMacroBuilder(ArgStringList &UpperCmdArgs, const ArgList &DriverArgs, llvm::raw_string_ostream &Output) ++ : MacroBuilder(Output), CmdArgs(UpperCmdArgs), DriverArgs(DriverArgs) { ++ } ++ virtual void defineMacro(const Twine &Name, const Twine &Value = "1") override { ++ CmdArgs.push_back("-def"); ++ CmdArgs.push_back(DriverArgs.MakeArgString(Name + Twine('=') + Value)); ++ } ++}; ++ ++void ClassicFlang::ConstructJob(Compilation &C, const JobAction &JA, ++ const InputInfo &Output, const InputInfoList &Inputs, ++ const ArgList &Args, const char *LinkingOutput) const { ++ const Driver &D = getToolChain().getDriver(); ++ const llvm::Triple &Triple = getToolChain().getEffectiveTriple(); ++ ArgStringList CommonCmdArgs; ++ ArgStringList UpperCmdArgs; ++ ArgStringList LowerCmdArgs; ++ SmallString<256> Stem; ++ std::string OutFile; ++ bool NeedIEEE = true; ++ bool NeedFastMath = false; ++ bool NeedRelaxedMath = false; ++ bool AssociativeMath = false; ++ bool SignedZeros = true; ++ ++ // Check number of inputs for sanity. We need at least one input. ++ assert(Inputs.size() >= 1 && "Must have at least one input."); ++ ++ /***** Process file arguments to both parts *****/ ++ const InputInfo &Input = Inputs0; ++ types::ID InputType = Input.getType(); ++ // Check file type sanity ++ assert(types::isAcceptedByFlang(InputType) && "Can only accept Fortran"); ++ ++ if (Args.hasArg(options::OPT_fsyntax_only, options::OPT_E)) { ++ // For -fsyntax-only and -E produce temp files only ++ Stem = C.getDriver().GetTemporaryPath("", ""); ++ } else { ++ OutFile = Output.getFilename(); ++ Stem = llvm::sys::path::filename(OutFile); ++ llvm::sys::path::replace_extension(Stem, ""); ++ } ++ ++#ifdef ENABLE_CLASSIC_FLANG ++ if (Args.hasArg(options::OPT_emit_flang_llvm)) { ++ // -emit-flang-llvm only supports asm output so claim -S to prevent warning ++ Args.ClaimAllArgs(options::OPT_S); ++ } ++#endif ++ ++ // Add input file name to the compilation line ++ UpperCmdArgs.push_back(Input.getBaseInput()); ++ ++ // Add temporary output for ILM ++ const char * ILMFile = Args.MakeArgString(Stem + ".ilm"); ++ LowerCmdArgs.push_back(ILMFile); ++ C.addTempFile(ILMFile); ++ ++ // Generate -cmdline ++ std::string CmdLine("'+flang"); ++ // ignore the first argument which reads "--driver-mode=fortran" ++ for (unsigned i = 1; i < Args.getNumInputArgStrings(); ++i) { ++ CmdLine.append(" "); ++ CmdLine.append(Args.getArgString(i)); ++ } ++ CmdLine.append("'"); ++ ++ CommonCmdArgs.push_back("-cmdline"); ++ CommonCmdArgs.push_back(Args.MakeArgString(CmdLine)); ++ ++ /***** Process common args *****/ ++ ++ // Add "inform level" flag ++ if (Args.hasArg(options::OPT_Minform_EQ)) { ++ // Parse arguments to set its value ++ for (Arg *A : Args.filtered(options::OPT_Minform_EQ)) { ++ A->claim(); ++ CommonCmdArgs.push_back("-inform"); ++ CommonCmdArgs.push_back(A->getValue(0)); ++ } ++ } else { ++ // Default value ++ CommonCmdArgs.push_back("-inform"); ++ CommonCmdArgs.push_back("warn"); ++ } ++ ++ for (auto Arg : Args.filtered(options::OPT_Msave_on)) { ++ Arg->claim(); ++ CommonCmdArgs.push_back("-save"); ++ } ++ ++ for (auto Arg : Args.filtered(options::OPT_Msave_off)) { ++ Arg->claim(); ++ CommonCmdArgs.push_back("-nosave"); ++ } ++ ++ // Treat denormalized numbers as zero: On ++ for (auto Arg : Args.filtered(options::OPT_Mdaz_on)) { ++ Arg->claim(); ++ CommonCmdArgs.push_back("-x"); ++ CommonCmdArgs.push_back("129"); ++ CommonCmdArgs.push_back("4"); ++ CommonCmdArgs.push_back("-y"); ++ CommonCmdArgs.push_back("129"); ++ CommonCmdArgs.push_back("0x400"); ++ } ++ ++ // Treat denormalized numbers as zero: Off ++ for (auto Arg : Args.filtered(options::OPT_Mdaz_off)) { ++ Arg->claim(); ++ CommonCmdArgs.push_back("-y"); ++ CommonCmdArgs.push_back("129"); ++ CommonCmdArgs.push_back("4"); ++ CommonCmdArgs.push_back("-x"); ++ CommonCmdArgs.push_back("129"); ++ CommonCmdArgs.push_back("0x400"); ++ } ++ ++ // Store constants in writable data segment ++ for (auto Arg : Args.filtered(options::OPT_Mwritable_constants)) { ++ Arg->claim(); ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("183"); ++ LowerCmdArgs.push_back("0x20000000"); ++ } ++ ++ // Bounds checking: On ++ for (auto Arg : Args.filtered(options::OPT_Mbounds_on)) { ++ Arg->claim(); ++ CommonCmdArgs.push_back("-x"); ++ CommonCmdArgs.push_back("70"); ++ CommonCmdArgs.push_back("2"); ++ } ++ ++ // Bounds checking: Off ++ for (auto Arg : Args.filtered(options::OPT_Mbounds_off)) { ++ Arg->claim(); ++ CommonCmdArgs.push_back("-y"); ++ CommonCmdArgs.push_back("70"); ++ CommonCmdArgs.push_back("2"); ++ } ++ ++ // Generate code allowing recursive subprograms ++ for (auto Arg : Args.filtered(options::OPT_Mrecursive_on)) { ++ Arg->claim(); ++ CommonCmdArgs.push_back("-recursive"); ++ } ++ ++ // Disable recursive subprograms ++ for (auto Arg : Args.filtered(options::OPT_Mrecursive_off)) { ++ Arg->claim(); ++ CommonCmdArgs.push_back("-norecursive"); ++ } ++ ++ // Enable generating reentrant code (disable optimizations that inhibit it) ++ for (auto Arg : Args.filtered(options::OPT_Mreentrant_on)) { ++ Arg->claim(); ++ CommonCmdArgs.push_back("-reentrant"); ++ } ++ ++ // Allow optimizations inhibiting reentrancy ++ for (auto Arg : Args.filtered(options::OPT_Mreentrant_off)) { ++ Arg->claim(); ++ CommonCmdArgs.push_back("-noreentrant"); ++ } ++ ++ // Swap byte order for unformatted IO ++ for (auto Arg : Args.filtered(options::OPT_Mbyteswapio, options::OPT_byteswapio)) { ++ Arg->claim(); ++ CommonCmdArgs.push_back("-x"); ++ CommonCmdArgs.push_back("125"); ++ CommonCmdArgs.push_back("2"); ++ } ++ ++ // Contiguous pointer checks ++ if (Arg *A = Args.getLastArg(options::OPT_fsanitize_EQ)) { ++ for (StringRef val : A->getValues()) { ++ if (val.equals("discontiguous") || val.equals("undefined") ) { ++ // -x 54 0x40 -x 54 0x80 -x 54 0x200 ++ UpperCmdArgs.push_back("-x"); ++ UpperCmdArgs.push_back("54"); ++ UpperCmdArgs.push_back("0x2c0"); ++ ++ // -fsanitze=discontiguous has no meaning in LLVM, only flang driver needs to ++ // recognize it. However -fsanitize=undefined needs to be passed on for further ++ // processing by the non-flang part of the driver. ++ if (val.equals("discontiguous")) ++ A->claim(); ++ break; ++ } ++ } ++ } ++ ++ // Treat backslashes as regular characters ++ for (auto Arg : Args.filtered(options::OPT_fno_backslash, options::OPT_Mbackslash)) { ++ Arg->claim(); ++ CommonCmdArgs.push_back("-x"); ++ CommonCmdArgs.push_back("124"); ++ CommonCmdArgs.push_back("0x40"); ++ } ++ ++ // Treat backslashes as C-style escape characters ++ for (auto Arg : Args.filtered(options::OPT_fbackslash, options::OPT_Mnobackslash)) { ++ Arg->claim(); ++ CommonCmdArgs.push_back("-y"); ++ CommonCmdArgs.push_back("124"); ++ CommonCmdArgs.push_back("0x40"); ++ } ++ ++ // handle OpemMP options ++ if (auto *A = Args.getLastArg(options::OPT_mp, options::OPT_nomp, ++ options::OPT_fopenmp, options::OPT_fno_openmp)) { ++ for (auto Arg : Args.filtered(options::OPT_mp, options::OPT_nomp)) { ++ Arg->claim(); ++ } ++ for (auto Arg : Args.filtered(options::OPT_fopenmp, ++ options::OPT_fno_openmp)) { ++ Arg->claim(); ++ } ++ ++ if (A->getOption().matches(options::OPT_mp) || ++ A->getOption().matches(options::OPT_fopenmp)) { ++ ++ CommonCmdArgs.push_back("-mp"); ++ ++ // Allocate threadprivate data local to the thread ++ CommonCmdArgs.push_back("-x"); ++ CommonCmdArgs.push_back("69"); ++ CommonCmdArgs.push_back("0x200"); ++ ++ // Use the 'fair' schedule as the default static schedule ++ // for parallel do loops ++ CommonCmdArgs.push_back("-x"); ++ CommonCmdArgs.push_back("69"); ++ CommonCmdArgs.push_back("0x400"); ++ ++ // Disable use of native atomic instructions ++ // for OpenMP atomics pending either a named ++ // option or a libatomic bundled with flang. ++ UpperCmdArgs.push_back("-x"); ++ UpperCmdArgs.push_back("69"); ++ UpperCmdArgs.push_back("0x1000"); ++ } ++ } ++ ++ // Align large objects on cache lines ++ for (auto Arg : Args.filtered(options::OPT_Mcache_align_on)) { ++ Arg->claim(); ++ CommonCmdArgs.push_back("-x"); ++ CommonCmdArgs.push_back("119"); ++ CommonCmdArgs.push_back("0x10000000"); ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("129"); ++ LowerCmdArgs.push_back("0x40000000"); ++ } ++ ++ // Disable special alignment of large objects ++ for (auto Arg : Args.filtered(options::OPT_Mcache_align_off)) { ++ Arg->claim(); ++ CommonCmdArgs.push_back("-y"); ++ CommonCmdArgs.push_back("119"); ++ CommonCmdArgs.push_back("0x10000000"); ++ LowerCmdArgs.push_back("-y"); ++ LowerCmdArgs.push_back("129"); ++ LowerCmdArgs.push_back("0x40000000"); ++ } ++ ++ // -Mstack_arrays ++ for (auto Arg : Args.filtered(options::OPT_Mstackarrays)) { ++ Arg->claim(); ++ CommonCmdArgs.push_back("-x"); ++ CommonCmdArgs.push_back("54"); ++ CommonCmdArgs.push_back("8"); ++ } ++ ++ // -Memit-dwarf-common-blocks-name, only add xbit to flang2. ++ for (auto Arg : Args.filtered(options::OPT_Memit_dwarf_common_blocks_name)) { ++ Arg->claim(); ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("183"); ++ LowerCmdArgs.push_back("0x40000000"); ++ } ++ ++ // -Munixlogical, only add xbit to flang2. ++ for (auto Arg : Args.filtered(options::OPT_Munixlogical)) { ++ Arg->claim(); ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("125"); ++ LowerCmdArgs.push_back("0x8"); ++ } ++ ++ // Last argument of -g/-gdwarfX should be taken. ++ Arg *GArg = Args.getLastArg(options::OPT_g_Flag); ++ Arg *GDwarfArg = Args.getLastArg(options::OPT_gdwarf_2, ++ options::OPT_gdwarf_3, ++ options::OPT_gdwarf_4, ++ options::OPT_gdwarf_5); ++ ++ if (GArg || GDwarfArg) { ++ ++ for (auto Arg : Args.filtered(options::OPT_g_Flag, options::OPT_gdwarf_2, ++ options::OPT_gdwarf_3, options::OPT_gdwarf_4, ++ options::OPT_gdwarf_5)) { ++ Arg->claim(); ++ } ++ ++ CommonCmdArgs.push_back("-x"); ++ CommonCmdArgs.push_back("120"); ++ ++ if (!GDwarfArg) // -g without -gdwarf-X produces default (DWARFv4) ++ CommonCmdArgs.push_back("0x1000000"); ++ else if (GDwarfArg->getOption().matches(options::OPT_gdwarf_2)) // -gdwarf-2 ++ CommonCmdArgs.push_back("0x200"); ++ else if (GDwarfArg->getOption().matches(options::OPT_gdwarf_3)) // -gdwarf-3 ++ CommonCmdArgs.push_back("0x4000"); ++ else if (GDwarfArg->getOption().matches(options::OPT_gdwarf_4)) // -gdwarf-4 ++ CommonCmdArgs.push_back("0x1000000"); ++ else if (GDwarfArg->getOption().matches(options::OPT_gdwarf_5)) // -gdwarf-5 ++ CommonCmdArgs.push_back("0x2000000"); ++ // Handle `-gpubnames` option separately. ++ for (auto Arg : Args.filtered(options::OPT_gpubnames)) { ++ Arg->claim(); ++ CommonCmdArgs.push_back("-x"); ++ CommonCmdArgs.push_back("120"); ++ CommonCmdArgs.push_back("0x40000000"); // -gpubnames ++ } ++ } ++ ++ // -Mipa has no effect ++ if (Arg *A = Args.getLastArg(options::OPT_Mipa)) { ++ D.Diag(diag::warn_drv_clang_unsupported) ++ << A->getAsString(Args); ++ } ++ ++ // -Minline has no effect ++ if (Arg *A = Args.getLastArg(options::OPT_Minline_on)) { ++ D.Diag(diag::warn_drv_clang_unsupported) ++ << A->getAsString(Args); ++ } ++ ++ // Handle -fdefault-real-8 (and its alias, -r8) and -fno-default-real-8 ++ if (Arg *A = Args.getLastArg(options::OPT_r8, ++ options::OPT_fdefault_real_8, ++ options::OPT_fno_default_real_8)) { ++ const char * fl; ++ // For -f version add -x flag, for -fno add -y ++ if (A->getOption().matches(options::OPT_fno_default_real_8)) { ++ fl = "-y"; ++ } else { ++ fl = "-x"; ++ } ++ ++ for (Arg *A : Args.filtered(options::OPT_r8, ++ options::OPT_fdefault_real_8, ++ options::OPT_fno_default_real_8)) { ++ A->claim(); ++ } ++ ++ UpperCmdArgs.push_back(fl); ++ UpperCmdArgs.push_back("124"); ++ UpperCmdArgs.push_back("0x8"); ++ UpperCmdArgs.push_back(fl); ++ UpperCmdArgs.push_back("124"); ++ UpperCmdArgs.push_back("0x80000"); ++ } ++ ++ // Process and claim -i8/-fdefault-integer-8/-fno-default-integer-8 argument ++ if (Arg *A = Args.getLastArg(options::OPT_i8, ++ options::OPT_fdefault_integer_8, ++ options::OPT_fno_default_integer_8)) { ++ const char * fl; ++ ++ if (A->getOption().matches(options::OPT_fno_default_integer_8)) { ++ fl = "-y"; ++ } else { ++ fl = "-x"; ++ } ++ ++ for (Arg *A : Args.filtered(options::OPT_i8, ++ options::OPT_fdefault_integer_8, ++ options::OPT_fno_default_integer_8)) { ++ A->claim(); ++ } ++ ++ UpperCmdArgs.push_back(fl); ++ UpperCmdArgs.push_back("124"); ++ UpperCmdArgs.push_back("0x10"); ++ } ++ ++ // Pass an arbitrary flag for first part of Fortran frontend ++ for (Arg *A : Args.filtered(options::OPT_Wh_EQ)) { ++ A->claim(); ++ StringRef Value = A->getValue(); ++ SmallVector<StringRef, 8> PassArgs; ++ Value.split(PassArgs, StringRef(",")); ++ for (StringRef PassArg : PassArgs) { ++ UpperCmdArgs.push_back(Args.MakeArgString(PassArg)); ++ } ++ } ++ ++ // Flush to zero mode ++ // Disabled by default, but can be enabled by a switch ++ if (Args.hasArg(options::OPT_Mflushz_on)) { ++ // For -Mflushz set -x 129 2 for second part of Fortran frontend ++ for (Arg *A: Args.filtered(options::OPT_Mflushz_on)) { ++ A->claim(); ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("129"); ++ LowerCmdArgs.push_back("2"); ++ } ++ } else { ++ LowerCmdArgs.push_back("-y"); ++ LowerCmdArgs.push_back("129"); ++ LowerCmdArgs.push_back("2"); ++ for (Arg *A: Args.filtered(options::OPT_Mflushz_off)) { ++ A->claim(); ++ } ++ } ++ ++ // For -fPIC set -x 62 8 for second part of Fortran frontend ++ for (Arg *A: Args.filtered(options::OPT_fPIC)) { ++ A->claim(); ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("62"); ++ LowerCmdArgs.push_back("8"); ++ } ++ ++ StringRef OptOStr("0"); ++ if (Arg *A = Args.getLastArg(options::OPT_O_Group)) { ++ if (A->getOption().matches(options::OPT_O4)) { ++ OptOStr = "4"; // FIXME what should this be? ++ } else if (A->getOption().matches(options::OPT_Ofast)) { ++ OptOStr = "2"; // FIXME what should this be? ++ } else if (A->getOption().matches(options::OPT_O0)) { ++ // intentionally do nothing ++ } else { ++ assert(A->getOption().matches(options::OPT_O) && "Must have a -O flag"); ++ StringRef S(A->getValue()); ++ if ((S == "s") || (S == "z")) { ++ // -Os = size; -Oz = more size ++ OptOStr = "2"; // FIXME -Os|-Oz => -opt ? ++ } else if ((S == "1") || (S == "2") || (S == "3")) { ++ OptOStr = S; ++ } else { ++ OptOStr = "4"; ++ } ++ } ++ } ++ unsigned OptLevel = std::stoi(OptOStr.str()); ++ ++ if (Args.hasArg(options::OPT_g_Group)) { ++ // pass -g to lower and upper ++ CommonCmdArgs.push_back("-debug"); ++ } ++ ++ /* Pick the last among conflicting flags, if a positive and negative flag ++ exists for ex. "-ffast-math -fno-fast-math" they get nullified. Also any ++ previously overwritten flag remains that way. ++ For ex. "-Kieee -ffast-math -fno-fast-math". -Kieee gets overwritten by ++ -ffast-math which then gets negated by -fno-fast-math, finally behaving as ++ if none of those flags were passed. ++ */ ++ for(Arg *A: Args.filtered(options::OPT_ffast_math, options::OPT_fno_fast_math, ++ options::OPT_Ofast, options::OPT_Kieee_off, ++ options::OPT_Kieee_on, options::OPT_frelaxed_math, ++ options::OPT_fassociative_math, ++ options::OPT_fno_associative_math, ++ options::OPT_fsigned_zeros, ++ options::OPT_fno_signed_zeros)) { ++ if (A->getOption().matches(options::OPT_ffast_math) || ++ A->getOption().matches(options::OPT_Ofast)) { ++ NeedIEEE = NeedRelaxedMath = false; ++ NeedFastMath = true; ++ } else if (A->getOption().matches(options::OPT_Kieee_on)) { ++ NeedFastMath = NeedRelaxedMath = AssociativeMath = false; ++ NeedIEEE = SignedZeros = true; ++ } else if (A->getOption().matches(options::OPT_frelaxed_math)) { ++ NeedFastMath = NeedIEEE = false; ++ NeedRelaxedMath = true; ++ } else if (A->getOption().matches(options::OPT_fno_fast_math)) { ++ NeedFastMath = false; ++ } else if (A->getOption().matches(options::OPT_Kieee_off)) { ++ NeedIEEE = false; ++ } else if (A->getOption().matches(options::OPT_fassociative_math)) { ++ AssociativeMath = true; ++ NeedIEEE = SignedZeros = false; ++ } else if (A->getOption().matches(options::OPT_fno_associative_math)) { ++ AssociativeMath = false; ++ } else if (A->getOption().matches(options::OPT_fsigned_zeros)) { ++ SignedZeros = true; ++ AssociativeMath = false; ++ } else if (A->getOption().matches(options::OPT_fno_signed_zeros)) { ++ SignedZeros = NeedIEEE = false; ++ } ++ A->claim(); ++ } ++ ++ // fp-contract=fast is the default ++ bool EnableFPContraction = true; ++ if (Arg *A = Args.getLastArg(options::OPT_ffp_contract, ++ options::OPT_Mfma_on, ++ options::OPT_fma, ++ options::OPT_Mfma_off, ++ options::OPT_nofma)) { ++ auto Opt = A->getOption(); ++ if (Opt.matches(options::OPT_ffp_contract)) { ++ StringRef Val = A->getValue(); ++ if ((Val == "fast") || (Val == "on")) { ++ EnableFPContraction = true; ++ } else if (Val == "off") { ++ EnableFPContraction = false; ++ } else { ++ D.Diag(diag::err_drv_unsupported_option_argument) ++ << A->getOption().getName() << Val; ++ } ++ } else if(Opt.matches(options::OPT_Mfma_on) || ++ Opt.matches(options::OPT_fma)) { ++ EnableFPContraction = true; ++ } else { ++ EnableFPContraction = false; ++ } ++ } ++ ++ if(OptLevel == 0) ++ EnableFPContraction = false; ++ ++ // Emit contract math instructions. ++ // Step 1 : Generate fma instructions in flang (can override with fma flag) ++ // Step 2 : Propagate fma contract information to LLVM to further ++ // exploit contraction opportunities ++ if (EnableFPContraction) { ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("172"); ++ LowerCmdArgs.push_back("0x40000000"); ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("179"); ++ LowerCmdArgs.push_back("1"); ++ // Step 2 ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("216"); ++ LowerCmdArgs.push_back("0x1000"); ++ } else { ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("171"); ++ LowerCmdArgs.push_back("0x40000000"); ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("178"); ++ LowerCmdArgs.push_back("1"); ++ } ++ ++ if (NeedFastMath) { ++ // Lower: -x 216 1 ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("216"); ++ LowerCmdArgs.push_back("1"); ++ // Common: -ieee 0 ++ CommonCmdArgs.push_back("-ieee"); ++ CommonCmdArgs.push_back("0"); ++ } else if (NeedIEEE) { ++ // Common: -y 129 2 ++ CommonCmdArgs.push_back("-y"); ++ CommonCmdArgs.push_back("129"); ++ CommonCmdArgs.push_back("2"); ++ // Lower: -x 6 0x100 ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("6"); ++ LowerCmdArgs.push_back("0x100"); ++ // Lower: -x 42 0x400000 ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("42"); ++ LowerCmdArgs.push_back("0x400000"); ++ // Lower: -y 129 4 ++ LowerCmdArgs.push_back("-y"); ++ LowerCmdArgs.push_back("129"); ++ LowerCmdArgs.push_back("4"); ++ // Lower: -x 129 0x400 ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("129"); ++ LowerCmdArgs.push_back("0x400"); ++ // Lower: -y 216 1 (OPT_fno_fast_math) ++ LowerCmdArgs.push_back("-y"); ++ LowerCmdArgs.push_back("216"); ++ LowerCmdArgs.push_back("1"); ++ // Common: -ieee 1 ++ CommonCmdArgs.push_back("-ieee"); ++ CommonCmdArgs.push_back("1"); ++ } else if (NeedRelaxedMath) { ++ // Lower: -x 15 0x400 ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("15"); ++ LowerCmdArgs.push_back("0x400"); ++ // Lower: -y 216 1 (OPT_fno_fast_math) ++ LowerCmdArgs.push_back("-y"); ++ LowerCmdArgs.push_back("216"); ++ LowerCmdArgs.push_back("1"); ++ // Common: -ieee 0 ++ CommonCmdArgs.push_back("-ieee"); ++ CommonCmdArgs.push_back("0"); ++ } else { ++ // Common: -ieee 0 ++ CommonCmdArgs.push_back("-ieee"); ++ CommonCmdArgs.push_back("0"); ++ } ++ ++ /***** Upper part of the Fortran frontend *****/ ++ ++ // TODO do we need to invoke this under GDB sometimes? ++ const char *UpperExec = Args.MakeArgString(getToolChain().GetProgramPath("flang1")); ++ ++ UpperCmdArgs.push_back("-opt"); UpperCmdArgs.push_back(Args.MakeArgString(OptOStr)); ++ UpperCmdArgs.push_back("-terse"); UpperCmdArgs.push_back("1"); ++ UpperCmdArgs.push_back("-inform"); UpperCmdArgs.push_back("warn"); ++ UpperCmdArgs.push_back("-nohpf"); ++ UpperCmdArgs.push_back("-nostatic"); ++ UpperCmdArgs.append(CommonCmdArgs.begin(), CommonCmdArgs.end()); // Append common arguments ++ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("19"); UpperCmdArgs.push_back("0x400000"); ++ UpperCmdArgs.push_back("-quad"); ++ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("68"); UpperCmdArgs.push_back("0x1"); ++ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("59"); UpperCmdArgs.push_back("4"); ++ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("15"); UpperCmdArgs.push_back("2"); ++ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("49"); UpperCmdArgs.push_back("0x400004"); ++ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("51"); UpperCmdArgs.push_back("0x20"); ++ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("57"); UpperCmdArgs.push_back("0x4c"); ++ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("58"); UpperCmdArgs.push_back("0x10000"); ++ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("124"); UpperCmdArgs.push_back("0x1000"); ++ UpperCmdArgs.push_back("-tp"); UpperCmdArgs.push_back("px"); ++ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("57"); UpperCmdArgs.push_back("0xfb0000"); ++ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("58"); UpperCmdArgs.push_back("0x78031040"); ++ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("47"); UpperCmdArgs.push_back("0x08"); ++ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("48"); UpperCmdArgs.push_back("4608"); ++ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("49"); UpperCmdArgs.push_back("0x100"); ++ if (OptLevel >= 2) { ++ UpperCmdArgs.push_back("-x"); ++ UpperCmdArgs.push_back("70"); ++ UpperCmdArgs.push_back("0x6c00"); ++ UpperCmdArgs.push_back("-x"); ++ UpperCmdArgs.push_back("119"); ++ UpperCmdArgs.push_back("0x10000000"); ++ UpperCmdArgs.push_back("-x"); ++ UpperCmdArgs.push_back("129"); ++ UpperCmdArgs.push_back("2"); ++ UpperCmdArgs.push_back("-x"); ++ UpperCmdArgs.push_back("47"); ++ UpperCmdArgs.push_back("0x400000"); ++ UpperCmdArgs.push_back("-x"); ++ UpperCmdArgs.push_back("52"); ++ UpperCmdArgs.push_back("2"); ++ } ++ ++ // Add system include arguments. ++ getToolChain().AddFlangSystemIncludeArgs(Args, UpperCmdArgs); ++ ++ // Use clang's predefined macros ++ DiagnosticsEngine DE(new DiagnosticIDs(), new DiagnosticOptions, new IgnoringDiagConsumer()); ++ std::shared_ptr<clang::TargetOptions> TO = std::make_shared<clang::TargetOptions>(); ++ TO->Triple = Triple.getTriple(); ++ std::shared_ptr<TargetInfo> TI(clang::TargetInfo::CreateTargetInfo(DE, TO)); ++ std::string PredefineBuffer; ++ llvm::raw_string_ostream Predefines(PredefineBuffer); ++ ClassicFlangMacroBuilder Builder(UpperCmdArgs, Args, Predefines); ++ ++ LangOptions LO; ++ VersionTuple VT = getToolChain().computeMSVCVersion(&D, Args); ++ if (!VT.empty()) { ++ // Set the MSCompatibility version. Subminor version has 5 decimal digits. ++ // Minor and major versions have 2 decimal digits each. ++ LO.MSCompatibilityVersion = VT.getMajor() * 10000000 + ++ VT.getMinor().value_or(0) * 100000 + ++ VT.getSubminor().value_or(0); ++ } ++ ++ // Define Target specific macros like __linux__ ++ TI->getTargetDefines(LO, Builder); ++ ++ Builder.defineMacro("__SIZE_TYPE__", ++ TargetInfo::getTypeName(TI->getSizeType())); ++ Builder.defineMacro( ++ "__PTRDIFF_TYPE__", ++ TargetInfo::getTypeName(TI->getPtrDiffType(LangAS::Default))); ++ ++ if (TI->getPointerWidth(LangAS::Default) == 64 && TI->getLongWidth() == 64 ++ && TI->getIntWidth() == 32) { ++ Builder.defineMacro("_LP64"); ++ Builder.defineMacro("__LP64__"); ++ } ++ ++ if (TI->getPointerWidth(LangAS::Default) == 32 && TI->getLongWidth() == 32 ++ && TI->getIntWidth() == 32) { ++ Builder.defineMacro("_ILP32"); ++ Builder.defineMacro("__ILP32__"); ++ } ++ ++ DefineTypeSize("__LONG_MAX__", TargetInfo::SignedLong, *TI, Builder); ++ ++ // Add additional predefined macros ++ switch (Triple.getArch()) { ++ case llvm::Triple::aarch64: ++ UpperCmdArgs.push_back("-def"); UpperCmdArgs.push_back("__ARM_ARCH__=8"); ++ break; ++ case llvm::Triple::x86_64: ++ UpperCmdArgs.push_back("-def"); UpperCmdArgs.push_back("__amd_64__amd64__"); ++ UpperCmdArgs.push_back("-def"); UpperCmdArgs.push_back("__k8"); ++ UpperCmdArgs.push_back("-def"); UpperCmdArgs.push_back("__k8__"); ++ break; ++ default: /* generic 64-bit */ ++ ; ++ } ++ UpperCmdArgs.push_back("-def"); UpperCmdArgs.push_back("__THROW="); ++ UpperCmdArgs.push_back("-def"); UpperCmdArgs.push_back("__extension__="); ++ UpperCmdArgs.push_back("-def"); UpperCmdArgs.push_back("__PGLLVM__"); ++ ++ // Enable preprocessor ++ if (Args.hasArg(options::OPT_Mpreprocess) || ++ Args.hasArg(options::OPT_cpp) || ++ Args.hasArg(options::OPT_E) || ++ types::getPreprocessedType(InputType) != types::TY_INVALID) { ++ UpperCmdArgs.push_back("-preprocess"); ++ for (auto Arg : Args.filtered(options::OPT_Mpreprocess, options::OPT_cpp, options::OPT_E)) { ++ Arg->claim(); ++ } ++ ++ // When -E option is provided, run only the fortran preprocessor. ++ // Only in -E mode, consume -P if it exists ++ if (Args.hasArg(options::OPT_E)) { ++ UpperCmdArgs.push_back("-es"); ++ // Line marker mode is disabled ++ if (Args.hasArg(options::OPT_P)) { ++ Args.ClaimAllArgs(options::OPT_P); ++ } else { ++ // -pp enables line marker mode in fortran preprocessor ++ UpperCmdArgs.push_back("-pp"); ++ } ++ } ++ } ++ ++ // Enable standards checking ++ if (Args.hasArg(options::OPT_Mstandard)) { ++ UpperCmdArgs.push_back("-standard"); ++ for (auto Arg : Args.filtered(options::OPT_Mstandard)) { ++ Arg->claim(); ++ } ++ } ++ ++ // Free or fixed form file ++ if (Args.hasArg(options::OPT_fortran_format_Group)) { ++ // Override file name suffix, scan arguments for that ++ for (Arg *A : Args.filtered(options::OPT_fortran_format_Group)) { ++ A->claim(); ++ switch (A->getOption().getID()) { ++ default: ++ llvm_unreachable("missed a case"); ++ case options::OPT_ffixed_form: ++ case options::OPT_fno_free_form: ++ case options::OPT_Mfixed: ++ case options::OPT_Mfree_off: ++ case options::OPT_Mfreeform_off: ++ UpperCmdArgs.push_back("-nofreeform"); ++ break; ++ case options::OPT_ffree_form: ++ case options::OPT_fno_fixed_form: ++ case options::OPT_Mfree_on: ++ case options::OPT_Mfreeform_on: ++ UpperCmdArgs.push_back("-freeform"); ++ break; ++ } ++ } ++ } else { ++ // Deduce format from file name suffix ++ if (types::isFreeFormFortran(InputType)) { ++ UpperCmdArgs.push_back("-freeform"); ++ } else { ++ UpperCmdArgs.push_back("-nofreeform"); ++ } ++ } ++ ++ // Extend lines to 132 characters ++ for (auto Arg : Args.filtered(options::OPT_Mextend)) { ++ Arg->claim(); ++ UpperCmdArgs.push_back("-extend"); ++ } ++ ++ for (auto Arg : Args.filtered(options::OPT_ffixed_line_length_VALUE)) { ++ StringRef Value = Arg->getValue(); ++ if (Value == "72") { ++ Arg->claim(); ++ } else if (Value == "132") { ++ Arg->claim(); ++ UpperCmdArgs.push_back("-extend"); ++ } else { ++ D.Diag(diag::err_drv_unsupported_fixed_line_length) ++ << Arg->getAsString(Args); ++ } ++ } ++ ++ // Add user-defined include directories ++ for (auto Arg : Args.filtered(options::OPT_I)) { ++ Arg->claim(); ++ UpperCmdArgs.push_back("-idir"); ++ UpperCmdArgs.push_back(Arg->getValue(0)); ++ } ++ ++ // Add user-defined module directories ++ for (auto Arg : Args.filtered(options::OPT_ModuleDir, options::OPT_J)) { ++ Arg->claim(); ++ UpperCmdArgs.push_back("-moddir"); ++ UpperCmdArgs.push_back(Arg->getValue(0)); ++ } ++ ++ // "Define" preprocessor flags ++ for (auto Arg : Args.filtered(options::OPT_D)) { ++ Arg->claim(); ++ UpperCmdArgs.push_back("-def"); ++ UpperCmdArgs.push_back(Arg->getValue(0)); ++ } ++ ++ // "Define" preprocessor flags ++ for (auto Arg : Args.filtered(options::OPT_U)) { ++ Arg->claim(); ++ UpperCmdArgs.push_back("-undef"); ++ UpperCmdArgs.push_back(Arg->getValue(0)); ++ } ++ ++ UpperCmdArgs.push_back("-vect"); UpperCmdArgs.push_back("48"); ++ ++ // Semantics for assignments to allocatables ++ if (Arg *A = Args.getLastArg(options::OPT_Mallocatable_EQ)) { ++ // Argument is passed explicitly ++ StringRef Value = A->getValue(); ++ if (Value == "03") { // Enable Fortran 2003 semantics ++ UpperCmdArgs.push_back("-x"); // Set XBIT ++ } else if (Value == "95") { // Enable Fortran 2003 semantics ++ UpperCmdArgs.push_back("-y"); // Unset XBIT ++ } else { ++ D.Diag(diag::err_drv_invalid_allocatable_mode) ++ << A->getAsString(Args); ++ } ++ } else { // No argument passed ++ UpperCmdArgs.push_back("-x"); // Default is 03 ++ } ++ UpperCmdArgs.push_back("54"); UpperCmdArgs.push_back("1"); // XBIT value ++ ++ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("70"); UpperCmdArgs.push_back("0x40000000"); ++ UpperCmdArgs.push_back("-y"); UpperCmdArgs.push_back("163"); UpperCmdArgs.push_back("0xc0000000"); ++ UpperCmdArgs.push_back("-x"); UpperCmdArgs.push_back("189"); UpperCmdArgs.push_back("0x10"); ++ ++ // Enable NULL pointer checking ++ if (Args.hasArg(options::OPT_Mchkptr)) { ++ UpperCmdArgs.push_back("-x"); ++ UpperCmdArgs.push_back("70"); ++ UpperCmdArgs.push_back("4"); ++ for (auto Arg : Args.filtered(options::OPT_Mchkptr)) { ++ Arg->claim(); ++ } ++ } ++ ++ // Set a -x flag for first part of Fortran frontend ++ for (Arg *A : Args.filtered(options::OPT_Hx_EQ)) { ++ A->claim(); ++ StringRef Value = A->getValue(); ++ auto XFlag = Value.split(","); ++ UpperCmdArgs.push_back("-x"); ++ UpperCmdArgs.push_back(Args.MakeArgString(XFlag.first)); ++ UpperCmdArgs.push_back(Args.MakeArgString(XFlag.second)); ++ } ++ ++ // Set a -y flag for first part of Fortran frontend ++ for (Arg *A : Args.filtered(options::OPT_Hy_EQ)) { ++ A->claim(); ++ StringRef Value = A->getValue(); ++ auto XFlag = Value.split(","); ++ UpperCmdArgs.push_back("-y"); ++ UpperCmdArgs.push_back(Args.MakeArgString(XFlag.first)); ++ UpperCmdArgs.push_back(Args.MakeArgString(XFlag.second)); ++ } ++ ++ // Set a -q (debug) flag for first part of Fortran frontend ++ for (Arg *A : Args.filtered(options::OPT_Hq_EQ)) { ++ A->claim(); ++ StringRef Value = A->getValue(); ++ auto XFlag = Value.split(","); ++ UpperCmdArgs.push_back("-q"); ++ UpperCmdArgs.push_back(Args.MakeArgString(XFlag.first)); ++ UpperCmdArgs.push_back(Args.MakeArgString(XFlag.second)); ++ } ++ ++ // Set a -qq (debug) flag for first part of Fortran frontend ++ for (Arg *A : Args.filtered(options::OPT_Hqq_EQ)) { ++ A->claim(); ++ StringRef Value = A->getValue(); ++ auto XFlag = Value.split(","); ++ UpperCmdArgs.push_back("-qq"); ++ UpperCmdArgs.push_back(Args.MakeArgString(XFlag.first)); ++ UpperCmdArgs.push_back(Args.MakeArgString(XFlag.second)); ++ } ++ ++ const char * STBFile = Args.MakeArgString(Stem + ".stb"); ++ C.addTempFile(STBFile); ++ UpperCmdArgs.push_back("-stbfile"); ++ UpperCmdArgs.push_back(STBFile); ++ ++ const char * ModuleExportFile = Args.MakeArgString(Stem + ".cmod"); ++ C.addTempFile(ModuleExportFile); ++ UpperCmdArgs.push_back("-modexport"); ++ UpperCmdArgs.push_back(ModuleExportFile); ++ ++ const char * ModuleIndexFile = Args.MakeArgString(Stem + ".cmdx"); ++ C.addTempFile(ModuleIndexFile); ++ UpperCmdArgs.push_back("-modindex"); ++ UpperCmdArgs.push_back(ModuleIndexFile); ++ ++ UpperCmdArgs.push_back("-output"); ++ UpperCmdArgs.push_back(ILMFile); ++ ++ SmallString<256> Path; ++ if(Args.getAllArgValues(options::OPT_fopenmp_targets_EQ).size() > 0) { ++ SmallString<128> TargetInfo; ++ Path = llvm::sys::path::parent_path(Output.getFilename()); ++ Arg* Tgts = Args.getLastArg(options::OPT_fopenmp_targets_EQ); ++ assert(Tgts && Tgts->getNumValues() && ++ "OpenMP offloading has to have targets specified."); ++ for (unsigned i = 0; i < Tgts->getNumValues(); ++i) { ++ if (i) ++ TargetInfo += ','; ++ llvm::Triple T(Tgts->getValue(i)); ++ TargetInfo += T.getTriple(); ++ } ++ UpperCmdArgs.push_back("-fopenmp-targets"); ++ UpperCmdArgs.push_back(Args.MakeArgString(TargetInfo.str())); ++ } ++ ++ C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::AtFileUTF8(), UpperExec, UpperCmdArgs, Inputs)); ++ ++ // For -fsyntax-only or -E that is it ++ if (Args.hasArg(options::OPT_fsyntax_only) || ++ Args.hasArg(options::OPT_E)) return; ++ ++ /***** Lower part of Fortran frontend *****/ ++ ++ const char *LowerExec = Args.MakeArgString(getToolChain().GetProgramPath("flang2")); ++ ++ // TODO FLANG arg handling ++ LowerCmdArgs.push_back("-fn"); LowerCmdArgs.push_back(Input.getBaseInput()); ++ LowerCmdArgs.push_back("-opt"); LowerCmdArgs.push_back(Args.MakeArgString(OptOStr)); ++ LowerCmdArgs.push_back("-terse"); LowerCmdArgs.push_back("1"); ++ LowerCmdArgs.push_back("-inform"); LowerCmdArgs.push_back("warn"); ++ LowerCmdArgs.append(CommonCmdArgs.begin(), CommonCmdArgs.end()); // Append common arguments ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("68"); LowerCmdArgs.push_back("0x1"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("51"); LowerCmdArgs.push_back("0x20"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("119"); LowerCmdArgs.push_back("0xa10000"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("122"); LowerCmdArgs.push_back("0x40"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("123"); LowerCmdArgs.push_back("0x1000"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("127"); LowerCmdArgs.push_back("4"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("127"); LowerCmdArgs.push_back("17"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("19"); LowerCmdArgs.push_back("0x400000"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("28"); LowerCmdArgs.push_back("0x40000"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("120"); LowerCmdArgs.push_back("0x10000000"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("70"); LowerCmdArgs.push_back("0x8000"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("122"); LowerCmdArgs.push_back("1"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("125"); LowerCmdArgs.push_back("0x20000"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("164"); LowerCmdArgs.push_back("0x800000"); ++ LowerCmdArgs.push_back("-quad"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("59"); LowerCmdArgs.push_back("4"); ++ LowerCmdArgs.push_back("-tp"); LowerCmdArgs.push_back("px"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("120"); LowerCmdArgs.push_back("0x1000"); // debug lite ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("124"); LowerCmdArgs.push_back("0x1400"); ++ LowerCmdArgs.push_back("-y"); LowerCmdArgs.push_back("15"); LowerCmdArgs.push_back("2"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("57"); LowerCmdArgs.push_back("0x3b0000"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("58"); LowerCmdArgs.push_back("0x48000000"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("49"); LowerCmdArgs.push_back("0x100"); ++ LowerCmdArgs.push_back("-astype"); LowerCmdArgs.push_back("0"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("183"); LowerCmdArgs.push_back("4"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("121"); LowerCmdArgs.push_back("0x800"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("54"); LowerCmdArgs.push_back("0x10"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("70"); LowerCmdArgs.push_back("0x40000000"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("249"); LowerCmdArgs.push_back("170"); // LLVM version ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("124"); LowerCmdArgs.push_back("1"); ++ LowerCmdArgs.push_back("-y"); LowerCmdArgs.push_back("163"); LowerCmdArgs.push_back("0xc0000000"); ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("189"); LowerCmdArgs.push_back("0x10"); ++ LowerCmdArgs.push_back("-y"); LowerCmdArgs.push_back("189"); LowerCmdArgs.push_back("0x4000000"); ++ ++ if (!SignedZeros) { ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("216"); ++ LowerCmdArgs.push_back("0x8"); ++ } ++ if (AssociativeMath) { ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back("216"); ++ LowerCmdArgs.push_back("0x10"); ++ } ++ ++ // Remove "noinline" attriblute ++ LowerCmdArgs.push_back("-x"); LowerCmdArgs.push_back("183"); LowerCmdArgs.push_back("0x10"); ++ ++ // Add target features ++ std::vector<StringRef> Features; ++ std::string FeatureList = ""; ++ getTargetFeatureList(D, Triple, Args, UpperCmdArgs, false, Features); ++ if (!Features.empty()) { ++ for (auto Feature : unifyTargetFeatures(Features)) { ++ if (!FeatureList.empty()) ++ FeatureList += ','; ++ FeatureList += Feature; ++ } ++ ++ LowerCmdArgs.push_back("-target_features"); ++ LowerCmdArgs.push_back(Args.MakeArgString(FeatureList)); ++ } ++ ++ // Add vscale range ++ unsigned vscaleMin = 0; ++ unsigned vscaleMax = 0; ++ bool hasSVE = false; ++ if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ)) { ++ StringRef Val = A->getValue(); ++ if (Val.equals("128") || Val.equals("256") || Val.equals("512") || ++ Val.equals("1024") || Val.equals("2048") || Val.equals("128+") || ++ Val.equals("256+") || Val.equals("512+") || Val.equals("1024+") || ++ Val.equals("2048+")) { ++ unsigned Bits = 0; ++ if (Val.endswith("+")) ++ Val = Val.substr(0, Val.size() - 1); ++ else { ++ bool Invalid = Val.getAsInteger(10, Bits); (void)Invalid; ++ assert(!Invalid && "Failed to parse value"); ++ vscaleMax = Bits / 128; ++ } ++ ++ bool Invalid = Val.getAsInteger(10, Bits); (void)Invalid; ++ assert(!Invalid && "Failed to parse value"); ++ vscaleMin = Bits / 128; ++ } else if (!Val.equals("scalable")) ++ getToolChain().getDriver().Diag(diag::warn_drv_clang_unsupported) ++ << A->getOption().getName() << Val; ++ } ++ for (auto Feature : unifyTargetFeatures(Features)) { ++ if (Feature.startswith("+sve")) { ++ hasSVE = true; ++ break; ++ } ++ } ++ if (vscaleMin || vscaleMax) { ++ LowerCmdArgs.push_back("-vscale_range_min"); ++ LowerCmdArgs.push_back(Args.MakeArgString( ++ std::to_string(vscaleMin ? vscaleMin : 1))); ++ LowerCmdArgs.push_back("-vscale_range_max"); ++ LowerCmdArgs.push_back(Args.MakeArgString(std::to_string(vscaleMax))); ++ } else { ++ if (hasSVE) { ++ LowerCmdArgs.push_back("-vscale_range_min"); ++ LowerCmdArgs.push_back(Args.MakeArgString(std::to_string(1))); ++ LowerCmdArgs.push_back("-vscale_range_max"); ++ LowerCmdArgs.push_back(Args.MakeArgString(std::to_string(16))); ++ } ++ } ++ ++ // Set a -x flag for second part of Fortran frontend ++ for (Arg *A : Args.filtered(options::OPT_Mx_EQ)) { ++ A->claim(); ++ StringRef Value = A->getValue(); ++ auto XFlag = Value.split(","); ++ LowerCmdArgs.push_back("-x"); ++ LowerCmdArgs.push_back(Args.MakeArgString(XFlag.first)); ++ LowerCmdArgs.push_back(Args.MakeArgString(XFlag.second)); ++ } ++ ++ // Set a -y flag for second part of Fortran frontend ++ for (Arg *A : Args.filtered(options::OPT_My_EQ)) { ++ A->claim(); ++ StringRef Value = A->getValue(); ++ auto XFlag = Value.split(","); ++ LowerCmdArgs.push_back("-y"); ++ LowerCmdArgs.push_back(Args.MakeArgString(XFlag.first)); ++ LowerCmdArgs.push_back(Args.MakeArgString(XFlag.second)); ++ } ++ ++ // Set a -q (debug) flag for second part of Fortran frontend ++ for (Arg *A : Args.filtered(options::OPT_Mq_EQ)) { ++ A->claim(); ++ StringRef Value = A->getValue(); ++ auto XFlag = Value.split(","); ++ LowerCmdArgs.push_back("-q"); ++ LowerCmdArgs.push_back(Args.MakeArgString(XFlag.first)); ++ LowerCmdArgs.push_back(Args.MakeArgString(XFlag.second)); ++ } ++ ++ // Set a -qq (debug) flag for second part of Fortran frontend ++ for (Arg *A : Args.filtered(options::OPT_Mqq_EQ)) { ++ A->claim(); ++ StringRef Value = A->getValue(); ++ auto XFlag = Value.split(","); ++ LowerCmdArgs.push_back("-qq"); ++ LowerCmdArgs.push_back(Args.MakeArgString(XFlag.first)); ++ LowerCmdArgs.push_back(Args.MakeArgString(XFlag.second)); ++ } ++ ++ // Pass an arbitrary flag for second part of Fortran frontend ++ for (Arg *A : Args.filtered(options::OPT_Wm_EQ)) { ++ A->claim(); ++ StringRef Value = A->getValue(); ++ SmallVector<StringRef, 8> PassArgs; ++ Value.split(PassArgs, StringRef(",")); ++ for (StringRef PassArg : PassArgs) { ++ LowerCmdArgs.push_back(Args.MakeArgString(PassArg)); ++ } ++ } ++ ++ LowerCmdArgs.push_back("-stbfile"); ++ LowerCmdArgs.push_back(STBFile); ++ ++ Path = llvm::sys::path::parent_path(Output.getFilename()); ++ bool IsOpenMPDevice = JA.isDeviceOffloading(Action::OFK_OpenMP); ++ ++ /* OpenMP GPU Offload */ ++ if(Args.getAllArgValues(options::OPT_fopenmp_targets_EQ).size() > 0) { ++ SmallString<128> TargetInfo;//("-fopenmp-targets "); ++ SmallString<256> TargetInfoAsm;//("-fopenmp-targets-asm "); ++ ++ Arg* Tgts = Args.getLastArg(options::OPT_fopenmp_targets_EQ); ++ assert(Tgts && Tgts->getNumValues() && ++ "OpenMP offloading has to have targets specified."); ++ for (unsigned i = 0; i < Tgts->getNumValues(); ++i) { ++ if (i) ++ TargetInfo += ','; ++ // We need to get the string from the triple because it may be not exactly ++ // the same as the one we get directly from the arguments. ++ llvm::Triple T(Tgts->getValue(i)); ++ TargetInfo += T.getTriple(); ++ // We also need to give a output file ++ TargetInfoAsm += Path; ++ TargetInfoAsm += "/"; ++ TargetInfoAsm += Stem; ++ TargetInfoAsm += "-"; ++ TargetInfoAsm += T.getTriple(); ++ TargetInfoAsm += ".ll"; ++ } ++ // The driver is aware that flang2 can generate multiple files at the same time. ++ // We mimic it here by exchanging the output files. ++ // The driver always uses the output file of -asm. ++ LowerCmdArgs.push_back("-fopenmp-targets"); ++ LowerCmdArgs.push_back(Args.MakeArgString(TargetInfo.str())); ++ if(IsOpenMPDevice) { ++ LowerCmdArgs.push_back("-fopenmp-targets-asm"); ++ LowerCmdArgs.push_back(Args.MakeArgString(OutFile)); ++ LowerCmdArgs.push_back("-asm"); ++ LowerCmdArgs.push_back(Args.MakeArgString(TargetInfoAsm.str())); ++ } else { ++ LowerCmdArgs.push_back("-fopenmp-targets-asm"); ++ LowerCmdArgs.push_back(Args.MakeArgString(TargetInfoAsm.str())); ++ LowerCmdArgs.push_back("-asm"); ++ LowerCmdArgs.push_back(Args.MakeArgString(OutFile)); ++ } ++ } else { ++ LowerCmdArgs.push_back("-asm"); ++ LowerCmdArgs.push_back(Args.MakeArgString(OutFile)); ++ } ++ ++ bool IsWindowsMSVC = getToolChain().getTriple().isWindowsMSVCEnvironment(); ++ if (IsWindowsMSVC && !Args.hasArg(options::OPT_noFlangLibs)) { ++ getToolChain().AddFortranStdlibLibArgs(Args, LowerCmdArgs); ++ for (auto Arg : Args.filtered(options::OPT_noFlangLibs)) { ++ Arg->claim(); ++ } ++ } ++ ++ C.addCommand(std::make_unique<Command>(JA, *this, ResponseFileSupport::AtFileUTF8(), LowerExec, LowerCmdArgs, Inputs)); ++} ++ +diff --git a/clang/lib/Driver/ToolChains/ClassicFlang.h b/clang/lib/Driver/ToolChains/ClassicFlang.h +new file mode 100644 +index 000000000000..5864e0776601 +--- /dev/null ++++ b/clang/lib/Driver/ToolChains/ClassicFlang.h +@@ -0,0 +1,49 @@ ++//===--- ClassicFlang.h - Flang ToolChain Implementations -------*- C++ -*-===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++ ++#ifndef LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ClassicFlang_H ++#define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ClassicFlang_H ++ ++#include "MSVC.h" ++#include "clang/Driver/Driver.h" ++#include "clang/Driver/Tool.h" ++#include "clang/Driver/Types.h" ++#include "llvm/Frontend/Debug/Options.h" ++#include "llvm/Option/Option.h" ++#include "llvm/Support/raw_ostream.h" ++#include "llvm/TargetParser/Triple.h" ++ ++namespace clang { ++namespace driver { ++ ++namespace tools { ++ ++/// \brief Flang Fortran frontend ++class LLVM_LIBRARY_VISIBILITY ClassicFlang : public Tool { ++public: ++ ClassicFlang(const ToolChain &TC) ++ : Tool("classic-flang", ++ "Fortran frontend to LLVM", TC) {} ++ ++ bool hasGoodDiagnostics() const override { return true; } ++ bool hasIntegratedAssembler() const override { return false; } ++ bool hasIntegratedCPP() const override { return true; } ++ ++ void ConstructJob(Compilation &C, const JobAction &JA, ++ const InputInfo &Output, const InputInfoList &Inputs, ++ const llvm::opt::ArgList &TCArgs, ++ const char *LinkingOutput) const override; ++}; ++ ++} // end namespace tools ++ ++} // end namespace driver ++} // end namespace clang ++ ++#endif // LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_ClassicFlang_H +diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp +index 0d6907b8e5c7..1ccc83a468ce 100644 +--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp ++++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp +@@ -146,6 +146,20 @@ static bool shouldIgnoreUnsupportedTargetFeature(const Arg &TargetFeatureArg, + return TargetFeatureArg.getOption().matches(options::OPT_mno_cumode); + } + ++#ifdef ENABLE_CLASSIC_FLANG ++/// \brief Determine if Fortran "main" object is needed ++bool tools::needFortranMain(const Driver &D, const ArgList &Args) { ++ return (needFortranLibs(D, Args) && !Args.hasArg(options::OPT_Mnomain) && ++ !Args.hasArg(options::OPT_no_fortran_main)); ++} ++ ++/// \brief Determine if Fortran link libraies are needed ++bool tools::needFortranLibs(const Driver &D, const ArgList &Args) { ++ return (D.IsFlangMode() && !Args.hasArg(options::OPT_nostdlib) && ++ !Args.hasArg(options::OPT_noFlangLibs)); ++} ++#endif ++ + void tools::addPathIfExists(const Driver &D, const Twine &Path, + ToolChain::path_list &Paths) { + if (D.getVFS().exists(Path)) +@@ -256,6 +270,9 @@ void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, + const ArgList &Args, ArgStringList &CmdArgs, + const JobAction &JA) { + const Driver &D = TC.getDriver(); ++#ifdef ENABLE_CLASSIC_FLANG ++ bool SeenFirstLinkerInput = false; ++#endif + + // Add extra linker input arguments which are not treated as inputs + // (constructed via -Xarch_). +@@ -289,6 +306,15 @@ void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, + if (II.isNothing()) + continue; + ++#ifdef ENABLE_CLASSIC_FLANG ++ // Add Fortan "main" before the first linker input ++ if (!SeenFirstLinkerInput) { ++ if (needFortranMain(D, Args)) { ++ CmdArgs.push_back("-lflangmain"); ++ } ++ SeenFirstLinkerInput = true; ++ } ++#endif + // Otherwise, this is a linker input argument. + const Arg &A = II.getInputArg(); + +@@ -300,6 +326,16 @@ void tools::AddLinkerInputs(const ToolChain &TC, const InputInfoList &Inputs, + else + A.renderAsInput(Args, CmdArgs); + } ++#ifdef ENABLE_CLASSIC_FLANG ++ if (!SeenFirstLinkerInput && needFortranMain(D, Args)) { ++ CmdArgs.push_back("-lflangmain"); ++ } ++ ++ // Claim "no Fortran main" arguments ++ for (auto Arg : Args.filtered(options::OPT_no_fortran_main, options::OPT_Mnomain)) { ++ Arg->claim(); ++ } ++#endif + } + + void tools::addLinkerCompressDebugSectionsOption( +@@ -489,10 +525,18 @@ static void getWebAssemblyTargetFeatures(const Driver &D, + options::OPT_m_wasm_Features_Group); + } + ++#ifndef ENABLE_CLASSIC_FLANG + void tools::getTargetFeatures(const Driver &D, const llvm::Triple &Triple, + const ArgList &Args, ArgStringList &CmdArgs, + bool ForAS, bool IsAux) { + std::vector<StringRef> Features; ++#else ++void tools::getTargetFeatureList(const Driver &D, ++ const llvm::Triple &Triple, ++ const ArgList &Args, ArgStringList &CmdArgs, ++ bool ForAS, ++ std::vector<StringRef> &Features) { ++#endif + switch (Triple.getArch()) { + default: + break; +@@ -567,6 +611,15 @@ void tools::getTargetFeatures(const Driver &D, const llvm::Triple &Triple, + loongarch::getLoongArchTargetFeatures(D, Triple, Args, Features); + break; + } ++#ifdef ENABLE_CLASSIC_FLANG ++} ++ ++void tools::getTargetFeatures(const Driver &D, const llvm::Triple &Triple, ++ const ArgList &Args, ArgStringList &CmdArgs, ++ bool ForAS, bool IsAux) { ++ std::vector<StringRef> Features; ++ getTargetFeatureList(D, Triple, Args, CmdArgs, ForAS, Features); ++#endif + + for (auto Feature : unifyTargetFeatures(Features)) { + CmdArgs.push_back(IsAux ? "-aux-target-feature" : "-target-feature"); +@@ -889,7 +942,11 @@ bool tools::addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC, + const ArgList &Args, bool ForceStaticHostRuntime, + bool IsOffloadingHost, bool GompNeedsRT) { + if (!Args.hasFlag(options::OPT_fopenmp, options::OPT_fopenmp_EQ, +- options::OPT_fno_openmp, false)) ++ options::OPT_fno_openmp, false) ++#ifdef ENABLE_CLASSIC_FLANG ++ && !Args.hasFlag(options::OPT_mp, options::OPT_nomp, false) ++#endif ++ ) + return false; + + Driver::OpenMPRuntimeKind RTKind = TC.getDriver().getOpenMPRuntime(Args); +@@ -934,7 +991,16 @@ bool tools::addOpenMPRuntime(ArgStringList &CmdArgs, const ToolChain &TC, + } + + void tools::addFortranRuntimeLibs(const ToolChain &TC, ++#ifdef ENABLE_CLASSIC_FLANG ++ const llvm::opt::ArgList &Args, ++#endif + llvm::opt::ArgStringList &CmdArgs) { ++#ifdef ENABLE_CLASSIC_FLANG ++ if (needFortranLibs(TC.getDriver(), Args)) ++ TC.AddFortranStdlibLibArgs(Args, CmdArgs); ++ else ++ Args.ClaimAllArgs(options::OPT_noFlangLibs); ++#else + if (TC.getTriple().isKnownWindowsMSVCEnvironment()) { + CmdArgs.push_back("Fortran_main.lib"); + CmdArgs.push_back("FortranRuntime.lib"); +@@ -944,6 +1010,7 @@ void tools::addFortranRuntimeLibs(const ToolChain &TC, + CmdArgs.push_back("-lFortranRuntime"); + CmdArgs.push_back("-lFortranDecimal"); + } ++#endif + } + + void tools::addFortranRuntimeLibraryPath(const ToolChain &TC, +diff --git a/clang/lib/Driver/ToolChains/CommonArgs.h b/clang/lib/Driver/ToolChains/CommonArgs.h +index 6a8de0f1c36d..bd5cb1bb866e 100644 +--- a/clang/lib/Driver/ToolChains/CommonArgs.h ++++ b/clang/lib/Driver/ToolChains/CommonArgs.h +@@ -23,6 +23,12 @@ namespace clang { + namespace driver { + namespace tools { + ++#ifdef ENABLE_CLASSIC_FLANG ++bool needFortranLibs(const Driver &D, const llvm::opt::ArgList &Args); ++ ++bool needFortranMain(const Driver &D, const llvm::opt::ArgList &Args); ++#endif ++ + void addPathIfExists(const Driver &D, const Twine &Path, + ToolChain::path_list &Paths); + +@@ -131,6 +137,9 @@ bool addOpenMPRuntime(llvm::opt::ArgStringList &CmdArgs, const ToolChain &TC, + + /// Adds Fortran runtime libraries to \p CmdArgs. + void addFortranRuntimeLibs(const ToolChain &TC, ++#ifdef ENABLE_CLASSIC_FLANG ++ const llvm::opt::ArgList &Args, ++#endif + llvm::opt::ArgStringList &CmdArgs); + + /// Adds the path for the Fortran runtime libraries to \p CmdArgs. +@@ -173,6 +182,17 @@ void AddTargetFeature(const llvm::opt::ArgList &Args, + std::string getCPUName(const Driver &D, const llvm::opt::ArgList &Args, + const llvm::Triple &T, bool FromAs = false); + ++#ifdef ENABLE_CLASSIC_FLANG ++// Helper function extracted from upstream getTargetFeatures. Classic Flang ++// uses this helper to render the target feature options for the Fortran ++// frontend. ++void getTargetFeatureList(const Driver &D, ++ const llvm::Triple &Triple, ++ const llvm::opt::ArgList &Args, ++ llvm::opt::ArgStringList &CmdArgs, ++ bool ForAS, std::vector<StringRef> &Features); ++#endif ++ + void getTargetFeatures(const Driver &D, const llvm::Triple &Triple, + const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs, bool ForAS, +diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp +index 3a577650eb08..36fddca2d177 100644 +--- a/clang/lib/Driver/ToolChains/Cuda.cpp ++++ b/clang/lib/Driver/ToolChains/Cuda.cpp +@@ -1034,3 +1034,38 @@ VersionTuple CudaToolChain::computeMSVCVersion(const Driver *D, + const ArgList &Args) const { + return HostTC.computeMSVCVersion(D, Args); + } ++ ++#ifdef ENABLE_CLASSIC_FLANG ++static void AddFlangSysIncludeArg(const ArgList &DriverArgs, ++ ArgStringList &Flang1Args, ++ ToolChain::path_list IncludePathList) { ++ std::string ArgValue; // Path argument value ++ ++ // Make up argument value consisting of paths separated by colons ++ bool first = true; ++ for (auto P : IncludePathList) { ++ if (first) { ++ first = false; ++ } else { ++ ArgValue += ":"; ++ } ++ ArgValue += P; ++ } ++ ++ // Add the argument ++ Flang1Args.push_back("-stdinc"); ++ Flang1Args.push_back(DriverArgs.MakeArgString(ArgValue)); ++} ++ ++void CudaToolChain::AddFlangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, ++ llvm::opt::ArgStringList &Flang1Args) const { ++ path_list IncludePathList; ++ const Driver &D = getDriver(); ++ if (DriverArgs.hasArg(options::OPT_nostdinc)) ++ return; ++ SmallString<128> P(D.InstalledDir); ++ llvm::sys::path::append(P, "../include"); ++ IncludePathList.push_back(P.c_str()); ++ AddFlangSysIncludeArg(DriverArgs, Flang1Args, IncludePathList); ++} ++#endif +diff --git a/clang/lib/Driver/ToolChains/Cuda.h b/clang/lib/Driver/ToolChains/Cuda.h +index 39df6e06fb26..3eded1f53e42 100644 +--- a/clang/lib/Driver/ToolChains/Cuda.h ++++ b/clang/lib/Driver/ToolChains/Cuda.h +@@ -214,6 +214,11 @@ public: + llvm::opt::ArgStringList &CC1Args) const override; + void AddIAMCUIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; ++#ifdef ENABLE_CLASSIC_FLANG ++ void ++ AddFlangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, ++ llvm::opt::ArgStringList &Flang1Args) const override; ++#endif + + SanitizerMask getSupportedSanitizers() const override; + +diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp +index 65bd6c6a7eb3..3b8e4d7e133a 100644 +--- a/clang/lib/Driver/ToolChains/Darwin.cpp ++++ b/clang/lib/Driver/ToolChains/Darwin.cpp +@@ -679,7 +679,11 @@ void darwin::Linker::ConstructJob(Compilation &C, const JobAction &JA, + // to generate executables. + if (getToolChain().getDriver().IsFlangMode()) { + addFortranRuntimeLibraryPath(getToolChain(), Args, CmdArgs); ++#ifdef ENABLE_CLASSIC_FLANG ++ addFortranRuntimeLibs(getToolChain(), Args, CmdArgs); ++#else + addFortranRuntimeLibs(getToolChain(), CmdArgs); ++#endif + } + + if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) +diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp +index 0bb5433a658a..076a2d4a21d8 100644 +--- a/clang/lib/Driver/ToolChains/Gnu.cpp ++++ b/clang/lib/Driver/ToolChains/Gnu.cpp +@@ -578,7 +578,11 @@ void tools::gnutools::Linker::ConstructJob(Compilation &C, const JobAction &JA, + // AddRuntTimeLibs). + if (D.IsFlangMode()) { + addFortranRuntimeLibraryPath(ToolChain, Args, CmdArgs); ++#ifdef ENABLE_CLASSIC_FLANG ++ addFortranRuntimeLibs(ToolChain, Args, CmdArgs); ++#else + addFortranRuntimeLibs(ToolChain, CmdArgs); ++#endif + CmdArgs.push_back("-lm"); + } + +diff --git a/clang/lib/Driver/ToolChains/Linux.cpp b/clang/lib/Driver/ToolChains/Linux.cpp +index 1ba222bf83b1..7f0714676a9d 100644 +--- a/clang/lib/Driver/ToolChains/Linux.cpp ++++ b/clang/lib/Driver/ToolChains/Linux.cpp +@@ -608,6 +608,196 @@ std::string Linux::getDynamicLinker(const ArgList &Args) const { + return "/" + LibDir + "/" + Loader; + } + ++#ifdef ENABLE_CLASSIC_FLANG ++/// Convert path list to Fortran frontend argument ++static void AddFlangSysIncludeArg(const ArgList &DriverArgs, ++ ArgStringList &Flang1Args, ++ ToolChain::path_list IncludePathList) { ++ std::string ArgValue; // Path argument value ++ ++ // Make up argument value consisting of paths separated by colons ++ bool first = true; ++ for (auto P : IncludePathList) { ++ if (first) { ++ first = false; ++ } else { ++ ArgValue += ":"; ++ } ++ ArgValue += P; ++ } ++ ++ // Add the argument ++ Flang1Args.push_back("-stdinc"); ++ Flang1Args.push_back(DriverArgs.MakeArgString(ArgValue)); ++} ++ ++void Linux::AddFlangSystemIncludeArgs(const ArgList &DriverArgs, ++ ArgStringList &Flang1Args) const { ++ path_list IncludePathList; ++ const Driver &D = getDriver(); ++ std::string SysRoot = computeSysRoot(); ++ ++ if (DriverArgs.hasArg(options::OPT_nostdinc)) ++ return; ++ ++ { ++ SmallString<128> P(D.InstalledDir); ++ llvm::sys::path::append(P, "../include"); ++ IncludePathList.push_back(P.c_str()); ++ } ++ ++ if (!DriverArgs.hasArg(options::OPT_nostdlibinc)) ++ IncludePathList.push_back(SysRoot + "/usr/local/include"); ++ ++ if (!DriverArgs.hasArg(options::OPT_nobuiltininc)) { ++ SmallString<128> P(D.ResourceDir); ++ llvm::sys::path::append(P, "include"); ++ IncludePathList.push_back(P.c_str()); ++ } ++ ++ if (DriverArgs.hasArg(options::OPT_nostdlibinc)) { ++ AddFlangSysIncludeArg(DriverArgs, Flang1Args, IncludePathList); ++ return; ++ } ++ ++ // Check for configure-time C include directories. ++ StringRef CIncludeDirs(C_INCLUDE_DIRS); ++ if (CIncludeDirs != "") { ++ SmallVector<StringRef, 5> dirs; ++ CIncludeDirs.split(dirs, ":"); ++ for (StringRef dir : dirs) { ++ StringRef Prefix = ++ llvm::sys::path::is_absolute(dir) ? StringRef(SysRoot) : ""; ++ IncludePathList.push_back(Prefix.str() + dir.str()); ++ } ++ AddFlangSysIncludeArg(DriverArgs, Flang1Args, IncludePathList); ++ return; ++ } ++ ++ // Lacking those, try to detect the correct set of system includes for the ++ // target triple. ++ ++ // Add include directories specific to the selected multilib set and multilib. ++ if (GCCInstallation.isValid()) { ++ const auto &Callback = Multilibs.includeDirsCallback(); ++ if (Callback) { ++ for (const auto &Path : Callback(GCCInstallation.getMultilib())) ++ addExternCSystemIncludeIfExists( ++ DriverArgs, Flang1Args, GCCInstallation.getInstallPath() + Path); ++ } ++ } ++ ++ // Implement generic Debian multiarch support. ++ const StringRef X86_64MultiarchIncludeDirs = { ++ "/usr/include/x86_64-linux-gnu", ++ ++ // FIXME: These are older forms of multiarch. It's not clear that they're ++ // in use in any released version of Debian, so we should consider ++ // removing them. ++ "/usr/include/i686-linux-gnu/64", "/usr/include/i486-linux-gnu/64"}; ++ const StringRef X86MultiarchIncludeDirs = { ++ "/usr/include/i386-linux-gnu", ++ ++ // FIXME: These are older forms of multiarch. It's not clear that they're ++ // in use in any released version of Debian, so we should consider ++ // removing them. ++ "/usr/include/x86_64-linux-gnu/32", "/usr/include/i686-linux-gnu", ++ "/usr/include/i486-linux-gnu"}; ++ const StringRef AArch64MultiarchIncludeDirs = { ++ "/usr/include/aarch64-linux-gnu"}; ++ const StringRef ARMMultiarchIncludeDirs = { ++ "/usr/include/arm-linux-gnueabi"}; ++ const StringRef ARMHFMultiarchIncludeDirs = { ++ "/usr/include/arm-linux-gnueabihf"}; ++ const StringRef MIPSMultiarchIncludeDirs = {"/usr/include/mips-linux-gnu"}; ++ const StringRef MIPSELMultiarchIncludeDirs = { ++ "/usr/include/mipsel-linux-gnu"}; ++ const StringRef MIPS64MultiarchIncludeDirs = { ++ "/usr/include/mips64-linux-gnu", "/usr/include/mips64-linux-gnuabi64"}; ++ const StringRef MIPS64ELMultiarchIncludeDirs = { ++ "/usr/include/mips64el-linux-gnu", ++ "/usr/include/mips64el-linux-gnuabi64"}; ++ const StringRef PPCMultiarchIncludeDirs = { ++ "/usr/include/powerpc-linux-gnu"}; ++ const StringRef PPC64MultiarchIncludeDirs = { ++ "/usr/include/powerpc64-linux-gnu"}; ++ const StringRef PPC64LEMultiarchIncludeDirs = { ++ "/usr/include/powerpc64le-linux-gnu"}; ++ const StringRef SparcMultiarchIncludeDirs = { ++ "/usr/include/sparc-linux-gnu"}; ++ const StringRef Sparc64MultiarchIncludeDirs = { ++ "/usr/include/sparc64-linux-gnu"}; ++ ArrayRef<StringRef> MultiarchIncludeDirs; ++ switch (getTriple().getArch()) { ++ case llvm::Triple::x86_64: ++ MultiarchIncludeDirs = X86_64MultiarchIncludeDirs; ++ break; ++ case llvm::Triple::x86: ++ MultiarchIncludeDirs = X86MultiarchIncludeDirs; ++ break; ++ case llvm::Triple::aarch64: ++ case llvm::Triple::aarch64_be: ++ MultiarchIncludeDirs = AArch64MultiarchIncludeDirs; ++ break; ++ case llvm::Triple::arm: ++ if (getTriple().getEnvironment() == llvm::Triple::GNUEABIHF) ++ MultiarchIncludeDirs = ARMHFMultiarchIncludeDirs; ++ else ++ MultiarchIncludeDirs = ARMMultiarchIncludeDirs; ++ break; ++ case llvm::Triple::mips: ++ MultiarchIncludeDirs = MIPSMultiarchIncludeDirs; ++ break; ++ case llvm::Triple::mipsel: ++ MultiarchIncludeDirs = MIPSELMultiarchIncludeDirs; ++ break; ++ case llvm::Triple::mips64: ++ MultiarchIncludeDirs = MIPS64MultiarchIncludeDirs; ++ break; ++ case llvm::Triple::mips64el: ++ MultiarchIncludeDirs = MIPS64ELMultiarchIncludeDirs; ++ break; ++ case llvm::Triple::ppc: ++ MultiarchIncludeDirs = PPCMultiarchIncludeDirs; ++ break; ++ case llvm::Triple::ppc64: ++ MultiarchIncludeDirs = PPC64MultiarchIncludeDirs; ++ break; ++ case llvm::Triple::ppc64le: ++ MultiarchIncludeDirs = PPC64LEMultiarchIncludeDirs; ++ break; ++ case llvm::Triple::sparc: ++ MultiarchIncludeDirs = SparcMultiarchIncludeDirs; ++ break; ++ case llvm::Triple::sparcv9: ++ MultiarchIncludeDirs = Sparc64MultiarchIncludeDirs; ++ break; ++ default: ++ break; ++ } ++ for (StringRef Dir : MultiarchIncludeDirs) { ++ if (llvm::sys::fs::exists(SysRoot + Dir)) { ++ IncludePathList.push_back(SysRoot + Dir.str()); ++ break; ++ } ++ } ++ ++ if (getTriple().getOS() == llvm::Triple::RTEMS) { ++ AddFlangSysIncludeArg(DriverArgs, Flang1Args, IncludePathList); ++ return; ++ } ++ ++ // Add an include of '/include' directly. This isn't provided by default by ++ // system GCCs, but is often used with cross-compiling GCCs, and harmless to ++ // add even when Clang is acting as-if it were a system compiler. ++ IncludePathList.push_back(SysRoot + "/include"); ++ ++ IncludePathList.push_back(SysRoot + "/usr/include"); ++ ++ AddFlangSysIncludeArg(DriverArgs, Flang1Args, IncludePathList); ++} ++#endif ++ + void Linux::AddClangSystemIncludeArgs(const ArgList &DriverArgs, + ArgStringList &CC1Args) const { + const Driver &D = getDriver(); +diff --git a/clang/lib/Driver/ToolChains/Linux.h b/clang/lib/Driver/ToolChains/Linux.h +index 524391743090..b6db4160811d 100644 +--- a/clang/lib/Driver/ToolChains/Linux.h ++++ b/clang/lib/Driver/ToolChains/Linux.h +@@ -27,6 +27,11 @@ public: + const llvm::Triple &TargetTriple, + StringRef SysRoot) const override; + ++#ifdef ENABLE_CLASSIC_FLANG ++ void ++ AddFlangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, ++ llvm::opt::ArgStringList &Flang1Args) const override; ++#endif + void + AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; +diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp +index a9fe9da4620f..1562e1a4b8eb 100644 +--- a/clang/lib/Driver/ToolChains/MSVC.cpp ++++ b/clang/lib/Driver/ToolChains/MSVC.cpp +@@ -131,7 +131,11 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, + + if (C.getDriver().IsFlangMode()) { + addFortranRuntimeLibraryPath(TC, Args, CmdArgs); ++#ifdef ENABLE_CLASSIC_FLANG ++ addFortranRuntimeLibs(TC, Args, CmdArgs); ++#else + addFortranRuntimeLibs(TC, CmdArgs); ++#endif + + // Inform the MSVC linker that we're generating a console application, i.e. + // one with `main` as the "user-defined" entry point. The `main` function is +@@ -263,6 +267,13 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, + } + } + ++#ifdef ENABLE_CLASSIC_FLANG ++ if (C.getDriver().IsFlangMode()) { ++ CmdArgs.push_back(Args.MakeArgString(std::string("-libpath:") + ++ TC.getDriver().Dir + "/../lib")); ++ } ++#endif ++ + // Add compiler-rt lib in case if it was explicitly + // specified as an argument for --rtlib option. + if (!Args.hasArg(options::OPT_nostdlib)) { +@@ -510,6 +521,74 @@ void MSVCToolChain::AddHIPRuntimeLibArgs(const ArgList &Args, + "amdhip64.lib"}); + } + ++#ifdef ENABLE_CLASSIC_FLANG ++void MSVCToolChain::AddFortranStdlibLibArgs(const ArgList &Args, ++ ArgStringList &CmdArgs) const { ++ bool staticFlangLibs = false; ++ bool useOpenMP = false; ++ ++ if (Args.hasArg(options::OPT_staticFlangLibs)) { ++ for (auto *A: Args.filtered(options::OPT_staticFlangLibs)) { ++ A->claim(); ++ staticFlangLibs = true; ++ } ++ } ++ ++ Arg *A = Args.getLastArg(options::OPT_mp, options::OPT_nomp, ++ options::OPT_fopenmp, options::OPT_fno_openmp); ++ if (A && ++ (A->getOption().matches(options::OPT_mp) || ++ A->getOption().matches(options::OPT_fopenmp))) { ++ useOpenMP = true; ++ } ++ ++ if (needFortranMain(getDriver(), Args)) { ++ // flangmain is always static ++ CmdArgs.push_back("-linker"); ++ CmdArgs.push_back("/subsystem:console"); ++ CmdArgs.push_back("-linker"); ++ CmdArgs.push_back("/defaultlib:flangmain.lib"); ++ } ++ ++ if (staticFlangLibs) { ++ CmdArgs.push_back("-linker"); ++ CmdArgs.push_back("/defaultlib:libflang.lib"); ++ CmdArgs.push_back("-linker"); ++ CmdArgs.push_back("/defaultlib:libflangrti.lib"); ++ CmdArgs.push_back("-linker"); ++ CmdArgs.push_back("/defaultlib:libpgmath.lib"); ++ } else { ++ CmdArgs.push_back("-linker"); ++ CmdArgs.push_back("/defaultlib:flang.lib"); ++ CmdArgs.push_back("-linker"); ++ CmdArgs.push_back("/defaultlib:flangrti.lib"); ++ CmdArgs.push_back("-linker"); ++ CmdArgs.push_back("/defaultlib:pgmath.lib"); ++ } ++ if (useOpenMP) { ++ CmdArgs.push_back("-linker"); ++ CmdArgs.push_back("/nodefaultlib:vcomp.lib"); ++ CmdArgs.push_back("-linker"); ++ CmdArgs.push_back("/nodefaultlib:vcompd.lib"); ++ CmdArgs.push_back("-linker"); ++ CmdArgs.push_back("/defaultlib:libomp.lib"); ++ } ++ ++ // Allways link Fortran executables with Pthreads ++ // CmdArgs.push_back("-lpthread"); ++ ++ // These options are added clang-cl in Clang.cpp for C/C++ ++ // In clang-cl.exe -MD and -MT control these options, but in ++ // flang.exe like clang.exe these are different options for ++ // dependency tracking. Let's assume that if somebody needs ++ // static flang libs, they don't need static C runtime libs. ++ // FIXME: Use LLVM_USE_CRT_<CMAKE_BUILD_TYPE> variable ++ // to use libcmt.lib or msvcrt.lib ++ CmdArgs.push_back("-linker"); ++ CmdArgs.push_back("/defaultlib:libcmt.lib"); ++} ++#endif ++ + void MSVCToolChain::printVerboseInfo(raw_ostream &OS) const { + CudaInstallation.print(OS); + RocmInstallation.print(OS); +@@ -634,6 +713,42 @@ void MSVCToolChain::AddSystemIncludeWithSubfolder( + addSystemInclude(DriverArgs, CC1Args, path); + } + ++#ifdef ENABLE_CLASSIC_FLANG ++/// Convert path list to Fortran frontend argument ++static void AddFlangSysIncludeArg(const ArgList &DriverArgs, ++ ArgStringList &Flang1Args, ++ ToolChain::path_list IncludePathList) { ++ std::string ArgValue; // Path argument value ++ ++ // Make up argument value consisting of paths separated by colons ++ bool first = true; ++ for (auto P : IncludePathList) { ++ if (first) { ++ first = false; ++ } else { ++ ArgValue += ";"; ++ } ++ ArgValue += P; ++ } ++ ++ // Add the argument ++ Flang1Args.push_back("-stdinc"); ++ Flang1Args.push_back(DriverArgs.MakeArgString(ArgValue)); ++} ++ ++void MSVCToolChain::AddFlangSystemIncludeArgs(const ArgList &DriverArgs, ++ ArgStringList &Flang1Args) const { ++path_list IncludePathList; ++ const Driver &D = getDriver(); ++ if (DriverArgs.hasArg(options::OPT_nostdinc)) ++ return; ++ SmallString<128> P(D.InstalledDir); ++ llvm::sys::path::append(P, "../include"); ++ IncludePathList.push_back(P.c_str()); ++ AddFlangSysIncludeArg(DriverArgs, Flang1Args, IncludePathList); ++} ++#endif ++ + void MSVCToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, + ArgStringList &CC1Args) const { + if (DriverArgs.hasArg(options::OPT_nostdinc)) +diff --git a/clang/lib/Driver/ToolChains/MSVC.h b/clang/lib/Driver/ToolChains/MSVC.h +index 0f687bc70ae4..46d5af9d114f 100644 +--- a/clang/lib/Driver/ToolChains/MSVC.h ++++ b/clang/lib/Driver/ToolChains/MSVC.h +@@ -84,6 +84,12 @@ public: + return VSLayout == llvm::ToolsetLayout::VS2017OrNewer; + } + ++#ifdef ENABLE_CLASSIC_FLANG ++ void ++ AddFlangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, ++ llvm::opt::ArgStringList &Flang1Args) const override; ++#endif ++ + void + AddClangSystemIncludeArgs(const llvm::opt::ArgList &DriverArgs, + llvm::opt::ArgStringList &CC1Args) const override; +@@ -100,6 +106,11 @@ public: + void AddHIPRuntimeLibArgs(const llvm::opt::ArgList &Args, + llvm::opt::ArgStringList &CmdArgs) const override; + ++#ifdef ENABLE_CLASSIC_FLANG ++ void AddFortranStdlibLibArgs(const llvm::opt::ArgList &Args, ++ llvm::opt::ArgStringList &CmdArgs) const override; ++#endif ++ + bool getWindowsSDKLibraryPath( + const llvm::opt::ArgList &Args, std::string &path) const; + bool getUniversalCRTLibraryPath(const llvm::opt::ArgList &Args, +diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp +index 503dbf3f0dea..264cc7ab06c8 100644 +--- a/clang/lib/Driver/ToolChains/MinGW.cpp ++++ b/clang/lib/Driver/ToolChains/MinGW.cpp +@@ -240,7 +240,11 @@ void tools::MinGW::Linker::ConstructJob(Compilation &C, const JobAction &JA, + + if (C.getDriver().IsFlangMode()) { + addFortranRuntimeLibraryPath(TC, Args, CmdArgs); ++#ifdef ENABLE_CLASSIC_FLANG ++ addFortranRuntimeLibs(TC, Args, CmdArgs); ++#else + addFortranRuntimeLibs(TC, CmdArgs); ++#endif + } + + // TODO: Add profile stuff here +diff --git a/clang/lib/Driver/Types.cpp b/clang/lib/Driver/Types.cpp +old mode 100644 +new mode 100755 +index 7d6308d757bc..a10c6e2c6c42 +--- a/clang/lib/Driver/Types.cpp ++++ b/clang/lib/Driver/Types.cpp +@@ -55,9 +55,11 @@ const char *types::getTypeName(ID Id) { + + types::ID types::getPreprocessedType(ID Id) { + ID PPT = getInfo(Id).PreprocessedType; ++#ifndef ENABLE_CLASSIC_FLANG + assert((getInfo(Id).Phases.contains(phases::Preprocess) != + (PPT == TY_INVALID)) && + "Unexpected Preprocess Type."); ++#endif + return PPT; + } + +@@ -134,6 +136,10 @@ bool types::isAcceptedByClang(ID Id) { + case TY_Asm: + case TY_C: case TY_PP_C: + case TY_CL: case TY_CLCXX: ++#ifdef ENABLE_CLASSIC_FLANG ++ case TY_F_FreeForm: case TY_PP_F_FreeForm: ++ case TY_F_FixedForm: case TY_PP_F_FixedForm: ++#endif + case TY_CUDA: case TY_PP_CUDA: + case TY_CUDA_DEVICE: + case TY_HIP: +@@ -164,12 +170,20 @@ bool types::isAcceptedByFlang(ID Id) { + default: + return false; + ++#ifdef ENABLE_CLASSIC_FLANG ++ case TY_F_FreeForm: ++ case TY_PP_F_FreeForm: ++ case TY_F_FixedForm: ++ case TY_PP_F_FixedForm: ++ return true; ++#else + case TY_Fortran: + case TY_PP_Fortran: + return true; + case TY_LLVM_IR: + case TY_LLVM_BC: + return true; ++#endif + } + } + +@@ -286,6 +300,22 @@ bool types::isHIP(ID Id) { + } + } + ++#ifdef ENABLE_CLASSIC_FLANG ++bool types::isFreeFormFortran(ID Id) { ++ if (!isAcceptedByFlang(Id)) ++ return false; ++ ++ return (Id == TY_F_FreeForm || Id == TY_PP_F_FreeForm); ++} ++ ++bool types::isFixedFormFortran(ID Id) { ++ if (!isAcceptedByFlang(Id)) ++ return false; ++ ++ return (Id == TY_F_FixedForm || Id == TY_PP_F_FixedForm); ++} ++#endif ++ + bool types::isHLSL(ID Id) { return Id == TY_HLSL; } + + bool types::isSrcFile(ID Id) { +@@ -296,8 +326,13 @@ types::ID types::lookupTypeForExtension(llvm::StringRef Ext) { + return llvm::StringSwitch<types::ID>(Ext) + .Case("c", TY_C) + .Case("C", TY_CXX) ++#ifdef ENABLE_CLASSIC_FLANG ++ .Case("F", TY_F_FixedForm) ++ .Case("f", TY_PP_F_FixedForm) ++#else + .Case("F", TY_Fortran) + .Case("f", TY_PP_Fortran) ++#endif + .Case("h", TY_CHeader) + .Case("H", TY_CXXHeader) + .Case("i", TY_PP_C) +@@ -331,6 +366,20 @@ types::ID types::lookupTypeForExtension(llvm::StringRef Ext) { + .Case("cui", TY_PP_CUDA) + .Case("cxx", TY_CXX) + .Case("CXX", TY_CXX) ++#ifdef ENABLE_CLASSIC_FLANG ++ .Case("for", TY_PP_F_FixedForm) ++ .Case("FOR", TY_PP_F_FixedForm) ++ .Case("fpp", TY_F_FixedForm) ++ .Case("FPP", TY_F_FixedForm) ++ .Case("f90", TY_PP_F_FreeForm) ++ .Case("f95", TY_PP_F_FreeForm) ++ .Case("f03", TY_PP_F_FreeForm) ++ .Case("f08", TY_PP_F_FreeForm) ++ .Case("F90", TY_F_FreeForm) ++ .Case("F95", TY_F_FreeForm) ++ .Case("F03", TY_F_FreeForm) ++ .Case("F08", TY_F_FreeForm) ++#else + .Case("F03", TY_Fortran) + .Case("f03", TY_PP_Fortran) + .Case("F08", TY_Fortran) +@@ -343,6 +392,7 @@ types::ID types::lookupTypeForExtension(llvm::StringRef Ext) { + .Case("FOR", TY_PP_Fortran) + .Case("fpp", TY_Fortran) + .Case("FPP", TY_Fortran) ++#endif + .Case("gch", TY_PCH) + .Case("hip", TY_HIP) + .Case("hipi", TY_PP_HIP) +diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp +index f8fae82fba12..7b01cb01a45e 100644 +--- a/clang/lib/Frontend/InitPreprocessor.cpp ++++ b/clang/lib/Frontend/InitPreprocessor.cpp +@@ -161,26 +161,6 @@ static void DefineFloatMacros(MacroBuilder &Builder, StringRef Prefix, + Builder.defineMacro(DefPrefix + "MIN__", Twine(Min)+Ext); + } + +- +-/// DefineTypeSize - Emit a macro to the predefines buffer that declares a macro +-/// named MacroName with the max value for a type with width 'TypeWidth' a +-/// signedness of 'isSigned' and with a value suffix of 'ValSuffix' (e.g. LL). +-static void DefineTypeSize(const Twine &MacroName, unsigned TypeWidth, +- StringRef ValSuffix, bool isSigned, +- MacroBuilder &Builder) { +- llvm::APInt MaxVal = isSigned ? llvm::APInt::getSignedMaxValue(TypeWidth) +- : llvm::APInt::getMaxValue(TypeWidth); +- Builder.defineMacro(MacroName, toString(MaxVal, 10, isSigned) + ValSuffix); +-} +- +-/// DefineTypeSize - An overloaded helper that uses TargetInfo to determine +-/// the width, suffix, and signedness of the given type +-static void DefineTypeSize(const Twine &MacroName, TargetInfo::IntType Ty, +- const TargetInfo &TI, MacroBuilder &Builder) { +- DefineTypeSize(MacroName, TI.getTypeWidth(Ty), TI.getTypeConstantSuffix(Ty), +- TI.isTypeSigned(Ty), Builder); +-} +- + static void DefineFmt(const Twine &Prefix, TargetInfo::IntType Ty, + const TargetInfo &TI, MacroBuilder &Builder) { + bool IsSigned = TI.isTypeSigned(Ty); +diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt +index 31b494f39cce..b88694deb818 100644 +--- a/clang/test/CMakeLists.txt ++++ b/clang/test/CMakeLists.txt +@@ -10,6 +10,7 @@ llvm_canonicalize_cmake_booleans( + CLANG_PLUGIN_SUPPORT + CLANG_SPAWN_CC1 + ENABLE_BACKTRACES ++ LLVM_ENABLE_CLASSIC_FLANG + LLVM_ENABLE_ZLIB + LLVM_ENABLE_ZSTD + LLVM_ENABLE_PER_TARGET_RUNTIME_DIR +diff --git a/clang/test/CodeGen/libpgmath-logfun-aarch64.ll b/clang/test/CodeGen/libpgmath-logfun-aarch64.ll +new file mode 100644 +index 000000000000..141fed29ccd1 +--- /dev/null ++++ b/clang/test/CodeGen/libpgmath-logfun-aarch64.ll +@@ -0,0 +1,58 @@ ++; REQUIRES: aarch64-registered-target ++ ++; RUN: %clang -target aarch64-unknown-linux-gnu -Ofast -S %s -o - | FileCheck %s ++ ++target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" ++ ++define void @fun_(i64* nocapture %z) local_unnamed_addr #0 { ++L.entry: ++ %0 = bitcast i64* %z to i8* ++ %1 = bitcast i64* %z to float* ++ %2 = load float, float* %1, align 4 ++ %3 = fpext float %2 to double ++ %4 = fadd double %3, 5.000000e-01 ++ %5 = tail call double @__pd_log_1(double %4) #1 ++ %6 = fptrunc double %5 to float ++ %7 = tail call float @__ps_exp_1(float %6) #2 ++ store float %7, float* %1, align 4 ++ %8 = getelementptr i8, i8* %0, i64 4 ++ %9 = bitcast i8* %8 to float* ++ %10 = load float, float* %9, align 4 ++ %11 = fpext float %10 to double ++ %12 = fadd double %11, 5.000000e-01 ++ %13 = tail call double @__pd_log_1(double %12) #1 ++ %14 = fptrunc double %13 to float ++ %15 = tail call float @__ps_exp_1(float %14) #2 ++ store float %15, float* %9, align 4 ++ %16 = getelementptr i64, i64* %z, i64 1 ++ %17 = bitcast i64* %16 to float* ++ %18 = load float, float* %17, align 4 ++ %19 = fpext float %18 to double ++ %20 = fadd double %19, 5.000000e-01 ++ %21 = tail call double @__pd_log_1(double %20) #1 ++ %22 = fptrunc double %21 to float ++ %23 = tail call float @__ps_exp_1(float %22) #2 ++ store float %23, float* %17, align 4 ++ %24 = getelementptr i8, i8* %0, i64 12 ++ %25 = bitcast i8* %24 to float* ++ %26 = load float, float* %25, align 4 ++ %27 = fpext float %26 to double ++ %28 = fadd double %27, 5.000000e-01 ++ %29 = tail call double @__pd_log_1(double %28) #1 ++ %30 = fptrunc double %29 to float ++ %31 = tail call float @__ps_exp_1(float %30) #2 ++ store float %31, float* %25, align 4 ++ ret void ++ ++; CHECK-NOT: __pd_log_4 ++; CHECK: __pd_log_2 ++; CHECK: __pd_log_2 ++} ++ ++; Function Attrs: nounwind readnone willreturn ++declare float @__ps_exp_1(float) #0 ++ ++; Function Attrs: nounwind readnone willreturn ++declare double @__pd_log_1(double) #0 ++ ++attributes #0 = { nounwind readnone willreturn } +diff --git a/clang/test/CodeGen/libpgmath-logfun-x86_64.ll b/clang/test/CodeGen/libpgmath-logfun-x86_64.ll +new file mode 100644 +index 000000000000..3ce1d910947f +--- /dev/null ++++ b/clang/test/CodeGen/libpgmath-logfun-x86_64.ll +@@ -0,0 +1,57 @@ ++; REQUIRES: x86-registered-target ++ ++; RUN: %clang -target x86_64-unknown-linux-gnu -msse -Ofast -S %s -o - | FileCheck %s ++ ++target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" ++ ++define void @fun_(i64* nocapture %z) local_unnamed_addr #0 { ++L.entry: ++ %0 = bitcast i64* %z to i8* ++ %1 = bitcast i64* %z to float* ++ %2 = load float, float* %1, align 4 ++ %3 = fpext float %2 to double ++ %4 = fadd double %3, 5.000000e-01 ++ %5 = tail call double @__pd_log_1(double %4) #1 ++ %6 = fptrunc double %5 to float ++ %7 = tail call float @__ps_exp_1(float %6) #2 ++ store float %7, float* %1, align 4 ++ %8 = getelementptr i8, i8* %0, i64 4 ++ %9 = bitcast i8* %8 to float* ++ %10 = load float, float* %9, align 4 ++ %11 = fpext float %10 to double ++ %12 = fadd double %11, 5.000000e-01 ++ %13 = tail call double @__pd_log_1(double %12) #1 ++ %14 = fptrunc double %13 to float ++ %15 = tail call float @__ps_exp_1(float %14) #2 ++ store float %15, float* %9, align 4 ++ %16 = getelementptr i64, i64* %z, i64 1 ++ %17 = bitcast i64* %16 to float* ++ %18 = load float, float* %17, align 4 ++ %19 = fpext float %18 to double ++ %20 = fadd double %19, 5.000000e-01 ++ %21 = tail call double @__pd_log_1(double %20) #1 ++ %22 = fptrunc double %21 to float ++ %23 = tail call float @__ps_exp_1(float %22) #2 ++ store float %23, float* %17, align 4 ++ %24 = getelementptr i8, i8* %0, i64 12 ++ %25 = bitcast i8* %24 to float* ++ %26 = load float, float* %25, align 4 ++ %27 = fpext float %26 to double ++ %28 = fadd double %27, 5.000000e-01 ++ %29 = tail call double @__pd_log_1(double %28) #1 ++ %30 = fptrunc double %29 to float ++ %31 = tail call float @__ps_exp_1(float %30) #2 ++ store float %31, float* %25, align 4 ++ ret void ++ ++; CHECK-NOT: __pd_log_1 ++; CHECK: __pd_log_4 ++} ++ ++; Function Attrs: nounwind readnone willreturn ++declare float @__ps_exp_1(float) #0 ++ ++; Function Attrs: nounwind readnone willreturn ++declare double @__pd_log_1(double) #0 ++ ++attributes #0 = { nounwind readnone willreturn } +diff --git a/clang/test/Driver/autocomplete.c b/clang/test/Driver/autocomplete.c +index d6f57708b67e..8c4921c9dbd5 100644 +--- a/clang/test/Driver/autocomplete.c ++++ b/clang/test/Driver/autocomplete.c +@@ -85,7 +85,7 @@ + // FVECLIBALL-NEXT: libmvec + // FVECLIBALL-NEXT: MASSV + // FVECLIBALL-NEXT: none +-// FVECLIBALL-NEXT: SLEEF ++// FVECLIBALL: SLEEF + // FVECLIBALL-NEXT: SVML + // RUN: %clang --autocomplete=-fshow-overloads= | FileCheck %s -check-prefix=FSOVERALL + // FSOVERALL: all +diff --git a/clang/test/Driver/emit-flang-attrs.f90 b/clang/test/Driver/emit-flang-attrs.f90 +new file mode 100644 +index 000000000000..df23a8aa6e87 +--- /dev/null ++++ b/clang/test/Driver/emit-flang-attrs.f90 +@@ -0,0 +1,58 @@ ++! REQUIRES: aarch64-registered-target ++! REQUIRES: classic_flang ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a -c %s 2>&1 | FileCheck --check-prefix=CHECK-ATTRS-NEON %s ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve -c %s 2>&1 | FileCheck --check-prefix=CHECK-ATTRS-SVE %s ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+nosve -c %s 2>&1 | FileCheck --check-prefix=CHECK-ATTRS-NOSVE %s ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve+nosve -c %s 2>&1 | FileCheck --check-prefix=CHECK-ATTRS-NOSVE %s ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2+nosve2 -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-REVERT ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-aes -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-AES ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-sm4 -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-SM4 ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-sha3 -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-SHA3 ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-bitperm+nosve2-bitperm -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-BITPERM-REVERT ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2 -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-IMPLY ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+nosve+sve2 -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-CONFLICT-REV ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve+sve2 -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE-SVE2 ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-bitperm -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-BITPERM ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+nosve+sve2-aes -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE-SUBFEATURE-CONFLICT-REV ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-sm4+nosve2 -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-SUBFEATURE-CONFLICT ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-bitperm+nosve2-aes -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-SUBFEATURE-MIX ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-sm4+nosve2-sm4 -c %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-SM4-REVERT ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-sha3+nosve2-sha3 %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-SHA3-REVERT ++! RUN: %flang -### -target aarch64-linux-gnu -march=armv8-a+sve2-aes+nosve2-aes %s 2>&1 | FileCheck %s --check-prefix=CHECK-SVE2-AES-REVERT ++ ++! CHECK-ATTRS-NEON: "{{.*}}flang2" ++! CHECK-ATTRS-NEON-SAME: "-target_features" "+neon,+v8a" ++! CHECK-ATTRS-SVE: "{{.*}}flang2" ++! CHECK-ATTRS-SVE-SAME: "-target_features" "+neon,+v8a,+sve" ++! CHECK-ATTRS-NOSVE: "{{.*}}flang2" ++! CHECK-ATTRS-NOSVE-SAME: "-target_features" "+neon,+v8a,-sve,-sve2,-sve2-bitperm,-sve2-sha3,-sve2-aes,-sve2-sm4" ++! CHECK-SVE2-REVERT: "{{.*}}flang2" ++! CHECK-SVE2-REVERT-SAME: "-target_features" "+neon,+v8a,+sve,-sve2,-sve2-bitperm,-sve2-sha3,-sve2-aes,-sve2-sm4" ++! CHECK-SVE2-AES: "{{.*}}flang2" ++! CHECK-SVE2-AES-SAME: "-target_features" "+neon,+v8a,+sve2-aes,+sve,+sve2" ++! CHECK-SVE2-SM4: "{{.*}}flang2" ++! CHECK-SVE2-SM4-SAME: "-target_features" "+neon,+v8a,+sve2-sm4,+sve,+sve2" ++! CHECK-SVE2-SHA3: "{{.*}}flang2" ++! CHECK-SVE2-SHA3-SAME: "-target_features" "+neon,+v8a,+sve2-sha3,+sve,+sve2" ++! CHECK-SVE2-BITPERM-REVERT: "{{.*}}flang2" ++! CHECK-SVE2-BITPERM-REVERT-SAME: "-target_features" "+neon,+v8a,+sve,+sve2,-sve2-bitperm" ++! CHECK-SVE2-IMPLY: "{{.*}}flang2" ++! CHECK-SVE2-IMPLY-SAME: "-target_features" "+neon,+v8a,+sve2,+sve" ++! CHECK-SVE2-CONFLICT-REV: "{{.*}}flang2" ++! CHECK-SVE2-CONFLICT-REV-SAME: "-target_features" "+neon,+v8a,-sve2-bitperm,-sve2-sha3,-sve2-aes,-sve2-sm4,+sve2,+sve" ++! CHECK-SVE-SVE2: "{{.*}}flang2" ++! CHECK-SVE-SVE2-SAME: "-target_features" "+neon,+v8a,+sve2,+sve" ++! CHECK-SVE2-BITPERM: "{{.*}}flang2" ++! CHECK-SVE2-BITPERM-SAME: "-target_features" "+neon,+v8a,+sve2-bitperm,+sve,+sve2" ++! CHECK-SVE-SUBFEATURE-CONFLICT-REV: "{{.*}}flang2" ++! CHECK-SVE-SUBFEATURE-CONFLICT-REV-SAME: "-target_features" "+neon,+v8a,-sve2-bitperm,-sve2-sha3,-sve2-sm4,+sve2-aes,+sve,+sve2" ++! CHECK-SVE2-SUBFEATURE-CONFLICT: "{{.*}}flang2" ++! CHECK-SVE2-SUBFEATURE-CONFLICT-SAME: "-target_features" "+neon,+v8a,+sve,-sve2,-sve2-bitperm,-sve2-sha3,-sve2-aes,-sve2-sm4" ++! CHECK-SVE2-SUBFEATURE-MIX: "{{.*}}flang2" ++! CHECK-SVE2-SUBFEATURE-MIX-SAME: "-target_features" "+neon,+v8a,+sve2-bitperm,+sve,+sve2,-sve2-aes" ++! CHECK-SVE2-SM4-REVERT: "{{.*}}flang2" ++! CHECK-SVE2-SM4-REVERT-SAME: "-target_features" "+neon,+v8a,+sve,+sve2,-sve2-sm4" ++! CHECK-SVE2-SHA3-REVERT: "{{.*}}flang2" ++! CHECK-SVE2-SHA3-REVERT-SAME: "-target_features" "+neon,+v8a,+sve,+sve2,-sve2-sha3" ++! CHECK-SVE2-AES-REVERT: "{{.*}}flang2" ++! CHECK-SVE2-AES-REVERT-SAME: "-target_features" "+neon,+v8a,+sve,+sve2,-sve2-aes" +diff --git a/clang/test/Driver/flang/Inputs/llvm-ir-input.ll b/clang/test/Driver/flang/Inputs/llvm-ir-input.ll +new file mode 100644 +index 000000000000..e69de29bb2d1 +diff --git a/clang/test/Driver/flang/classic-flang-emit-flang-llvm.f95 b/clang/test/Driver/flang/classic-flang-emit-flang-llvm.f95 +new file mode 100644 +index 000000000000..225207c85db7 +--- /dev/null ++++ b/clang/test/Driver/flang/classic-flang-emit-flang-llvm.f95 +@@ -0,0 +1,10 @@ ++! REQUIRES: classic_flang ++ ++! Check that the -emit-flang-llvm option dumps LLVM IR pre-optimisation ++ ++! RUN: %clang --driver-mode=flang -emit-flang-llvm -S -o %t.ll %s -### 2>&1 \ ++! RUN: | FileCheck %s ++! CHECK-NOT: argument unused during compilation: '-S' ++! CHECK: "{{.*}}flang1" ++! CHECK-NEXT: "{{.*}}flang2" ++! CHECK-NOT: "{{.*}}clang{{.*}}" "-cc1" +diff --git a/clang/test/Driver/flang/classic-flang-fp-contract.f95 b/clang/test/Driver/flang/classic-flang-fp-contract.f95 +new file mode 100644 +index 000000000000..b181065d1cac +--- /dev/null ++++ b/clang/test/Driver/flang/classic-flang-fp-contract.f95 +@@ -0,0 +1,27 @@ ++! REQUIRES: classic_flang ++ ++! RUN: %flang -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT-ABSENCE ++! RUN: %flang -O1 -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT ++! RUN: %flang -O2 -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT ++! RUN: %flang -O3 -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT ++! RUN: %flang -Ofast -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT ++! RUN: %flang -ffp-contract=fast -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT-ABSENCE ++! RUN: %flang -O1 -ffp-contract=fast -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT ++! RUN: %flang -O2 -ffp-contract=fast -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT ++! RUN: %flang -O3 -ffp-contract=fast -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT ++! RUN: %flang -Ofast -ffp-contract=fast -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT ++! RUN: %flang -ffp-contract=on -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT-ABSENCE ++! RUN: %flang -O1 -ffp-contract=on -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT ++! RUN: %flang -O2 -ffp-contract=on -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT ++! RUN: %flang -O3 -ffp-contract=on -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT ++! RUN: %flang -Ofast -ffp-contract=on -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT ++! RUN: %flang -ffp-contract=off -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT-ABSENCE ++! RUN: %flang -O1 -ffp-contract=off -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT-ABSENCE ++! RUN: %flang -O2 -ffp-contract=off -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT-ABSENCE ++! RUN: %flang -O3 -ffp-contract=off -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT-ABSENCE ++! RUN: %flang -Ofast -ffp-contract=off -c %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-FLANG2-FP-CONTRACT-ABSENCE ++ ++! CHECK-FLANG2-FP-CONTRACT: "{{.*}}flang2" ++! CHECK-FLANG2-FP-CONTRACT-SAME: "-x" "172" "0x40000000" "-x" "179" "1" "-x" "216" "0x1000" ++! CHECK-FLANG2-FP-CONTRACT-ABSENCE: "{{.*}}flang2" ++! CHECK-FLANG2-FP-CONTRACT-ABSENCE-SAME: "-x" "171" "0x40000000" "-x" "178" "1" +diff --git a/clang/test/Driver/flang/classic-flang-must-preprocess.F b/clang/test/Driver/flang/classic-flang-must-preprocess.F +new file mode 100644 +index 000000000000..d52c1cf8d3c0 +--- /dev/null ++++ b/clang/test/Driver/flang/classic-flang-must-preprocess.F +@@ -0,0 +1,12 @@ ++! REQUIRES: classic_flang ++ ++! Check that the driver invokes flang1 correctly for fixed-form Fortran code ++! which requires preprocessing. ++ ++! RUN: %clang --driver-mode=flang -target x86_64-unknown-linux-gnu -c %s -### 2>&1 \ ++! RUN: | FileCheck %s ++! CHECK: "flang1" ++! CHECK-SAME: "-preprocess" ++! CHECK-SAME: "-nofreeform" ++! CHECK-NEXT: "flang2" ++! CHECK-NEXT: {{clang.* "-cc1"}} +diff --git a/clang/test/Driver/flang/classic-flang-must-preprocess.F95 b/clang/test/Driver/flang/classic-flang-must-preprocess.F95 +new file mode 100644 +index 000000000000..7d452a535784 +--- /dev/null ++++ b/clang/test/Driver/flang/classic-flang-must-preprocess.F95 +@@ -0,0 +1,12 @@ ++! REQUIRES: classic_flang ++ ++! Check that the driver invokes flang1 correctly for free-form Fortran code ++! which requires preprocessing. ++ ++! RUN: %clang --driver-mode=flang -target x86_64-unknown-linux-gnu -c %s -### 2>&1 \ ++! RUN: | FileCheck %s ++! CHECK: "flang1" ++! CHECK-SAME: "-preprocess" ++! CHECK-SAME: "-freeform" ++! CHECK-NEXT: "flang2" ++! CHECK-NEXT: {{clang.* "-cc1"}} +diff --git a/clang/test/Driver/flang/classic-flang-version.f b/clang/test/Driver/flang/classic-flang-version.f +new file mode 100644 +index 000000000000..c2082d3af8b7 +--- /dev/null ++++ b/clang/test/Driver/flang/classic-flang-version.f +@@ -0,0 +1,3 @@ ++! REQUIRES: classic-flang ++! RUN: %flang --version | FileCheck %s ++! CHECK: flang version {{.*}} ({{.*}}flang-compiler/classic-flang-llvm-project.git {{.*}}) +diff --git a/clang/test/Driver/flang/classic-flang-vscale-mbits.f95 b/clang/test/Driver/flang/classic-flang-vscale-mbits.f95 +new file mode 100644 +index 000000000000..f0ed43aa027a +--- /dev/null ++++ b/clang/test/Driver/flang/classic-flang-vscale-mbits.f95 +@@ -0,0 +1,28 @@ ++// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve -msve-vector-bits=128 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE-128 %s ++// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve -msve-vector-bits=128+ %s 2>&1 | FileCheck -check-prefix=CHECK-SVE-128PLUS %s ++// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve -msve-vector-bits=256 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE-256 %s ++// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve -msve-vector-bits=256+ %s 2>&1 | FileCheck -check-prefix=CHECK-SVE-256PLUS %s ++// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2 -msve-vector-bits=512 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2-512 %s ++// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2 -msve-vector-bits=512+ %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2-512PLUS %s ++// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2-sha3 -msve-vector-bits=2048 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2SHA3-2048 %s ++// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2-sha3 -msve-vector-bits=2048+ %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2SHA3-2048PLUS %s ++// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2 -msve-vector-bits=scalable %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2-SCALABLE %s ++ ++// CHECK-SVE-128: "-target_features" "+neon,+v8a,+sve" ++// CHECK-SVE-128-DAG: "-vscale_range_min" "1" "-vscale_range_max" "1" ++// CHECK-SVE-128PLUS: "-target_features" "+neon,+v8a,+sve" ++// CHECK-SVE-128PLUS-DAG: "-vscale_range_min" "1" "-vscale_range_max" "0" ++// CHECK-SVE-256: "-target_features" "+neon,+v8a,+sve" ++// CHECK-SVE-256-DAG: "-vscale_range_min" "2" "-vscale_range_max" "2" ++// CHECK-SVE-256PLUS: "-target_features" "+neon,+v8a,+sve" ++// CHECK-SVE-256PLUS-DAG: "-vscale_range_min" "2" "-vscale_range_max" "0" ++// CHECK-SVE2-512: "-target_features" "+neon,+v8a,+sve2,+sve" ++// CHECK-SVE2-512-DAG: "-vscale_range_min" "4" "-vscale_range_max" "4" ++// CHECK-SVE2-512PLUS: "-target_features" "+neon,+v8a,+sve2,+sve" ++// CHECK-SVE2-512PLUS-DAG: "-vscale_range_min" "4" "-vscale_range_max" "0" ++// CHECK-SVE2SHA3-2048: "-target_features" "+neon,+v8a,+sve2-sha3,+sve,+sve2" ++// CHECK-SVE2SHA3-2048-DAG: "-vscale_range_min" "16" "-vscale_range_max" "16" ++// CHECK-SVE2SHA3-2048PLUS: "-target_features" "+neon,+v8a,+sve2-sha3,+sve,+sve2" ++// CHECK-SVE2SHA3-2048PLUS-DAG: "-vscale_range_min" "16" "-vscale_range_max" "0" ++// CHECK-SVE2-SCALABLE: "-target_features" "+neon,+v8a,+sve2,+sve" ++// CHECK-SVE2-SCALABLE-DAG: "-vscale_range_min" "1" "-vscale_range_max" "16" +diff --git a/clang/test/Driver/flang/classic-flang-vscale.f95 b/clang/test/Driver/flang/classic-flang-vscale.f95 +new file mode 100644 +index 000000000000..8110be594db5 +--- /dev/null ++++ b/clang/test/Driver/flang/classic-flang-vscale.f95 +@@ -0,0 +1,28 @@ ++// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a %s 2>&1 | FileCheck -check-prefix=CHECK-NEON %s ++// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve %s 2>&1 | FileCheck -check-prefix=CHECK-SVE %s ++// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2 %s ++// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2-sha3 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2SHA3 %s ++// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve+nosve %s 2>&1 | FileCheck -check-prefix=CHECK-SVE-NOSVE %s ++// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2+nosve2-sha3 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2-NOSVE2SHA3 %s ++// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2-sha3+nosve2 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2SHA3-NOSVE2 %s ++// RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2-sha3+nosve %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2SHA3-NOSVE %s ++ ++// CHECK-NEON: "-target_features" "+neon,+v8a" ++// CHECK-NEON-NOT: "-vscale_range_min" ++// CHECK-NEON-NOT: "-vscale_range_max" ++// CHECK-SVE: "-target_features" "+neon,+v8a,+sve" ++// CHECK-SVE-DAG: "-vscale_range_min" "1" "-vscale_range_max" "16" ++// CHECK-SVE2: "-target_features" "+neon,+v8a,+sve2,+sve" ++// CHECK-SVE2-DAG: "-vscale_range_min" "1" "-vscale_range_max" "16" ++// CHECK-SVE2SHA3: "-target_features" "+neon,+v8a,+sve2-sha3,+sve,+sve2" ++// CHECK-SVE2SHA3-DAG: "-vscale_range_min" "1" "-vscale_range_max" "16" ++// CHECK-SVE-NOSVE: "-target_features" "+neon,+v8a,-sve,-sve2,-sve2-bitperm,-sve2-sha3,-sve2-aes,-sve2-sm4" ++// CHECK-SVE-NOSVE-NOT: "-vscale_range_min" ++// CHECK-SVE-NOSVE-NOT: "-vscale_range_max" ++// CHECK-SVE2-NOSVE2SHA3: "-target_features" "+neon,+v8a,+sve2,+sve,-sve2-sha3" ++// CHECK-SVE2-NOSVE2SHA3-DAG: "-vscale_range_min" "1" "-vscale_range_max" "16" ++// CHECK-SVE2SHA3-NOSVE2: "-target_features" "+neon,+v8a,+sve,-sve2,-sve2-bitperm,-sve2-sha3,-sve2-aes,-sve2-sm4" ++// CHECK-SVE2SHA3-NOSVE2-DAG: "-vscale_range_min" "1" "-vscale_range_max" "16" ++// CHECK-SVE2SHA3-NOSVE: "-target_features" "+neon,+v8a,-sve,-sve2,-sve2-bitperm,-sve2-sha3,-sve2-aes,-sve2-sm4" ++// CHECK-SVE2SHA3-NOSVE-NOT: "-vscale_range_min" ++// CHECK-SVE2SHA3-NOSVE-NOT: "-vscale_range_max" +diff --git a/clang/test/Driver/flang/classic-flang.f b/clang/test/Driver/flang/classic-flang.f +new file mode 100644 +index 000000000000..98f74d9626d7 +--- /dev/null ++++ b/clang/test/Driver/flang/classic-flang.f +@@ -0,0 +1,26 @@ ++! REQUIRES: classic_flang ++ ++! Check that the driver invokes flang1 correctly for preprocessed fixed-form ++! Fortran code. ++ ++! RUN: %clang --driver-mode=flang -target x86_64-unknown-linux-gnu -c %s -### 2>&1 \ ++! RUN: | FileCheck %s ++! CHECK: "flang1" ++! CHECK-NOT: "-preprocess" ++! CHECK-SAME: "-nofreeform" ++! CHECK-NEXT: "flang2" ++! CHECK-NEXT: {{clang.* "-cc1"}} ++ ++! Check that the driver invokes flang1 correctly when preprocessing is ++! explicitly requested. ++ ++! RUN: %clang --driver-mode=flang -target x86_64-unknown-linux-gnu -E %s -### 2>&1 \ ++! RUN: | FileCheck --check-prefix=CHECK-PREPROCESS %s ++! CHECK-PREPROCESS: "flang1" ++! CHECK-PREPROCESS-SAME: "-preprocess" ++! CHECK-PREPROCESS-SAME: "-es" ++! CHECK-PREPROCESS-SAME: "-pp" ++! CHECK-PREPROCESS-NOT: "flang1" ++! CHECK-PREPROCESS-NOT: "flang2" ++! CHECK-PREPROCESS-NOT: {{clang.* "-cc1"}} ++! CHECK-PREPROCESS-NOT: {{clang.* "-cc1as"}} +diff --git a/clang/test/Driver/flang/classic-flang.f95 b/clang/test/Driver/flang/classic-flang.f95 +new file mode 100644 +index 000000000000..3350bd45550f +--- /dev/null ++++ b/clang/test/Driver/flang/classic-flang.f95 +@@ -0,0 +1,120 @@ ++! REQUIRES: classic_flang ++ ++! Check that the driver invokes flang1 correctly for preprocessed free-form ++! Fortran code. Also check that the backend is invoked correctly. ++ ++! RUN: %clang --driver-mode=flang -target x86_64-unknown-linux-gnu -c %s -### 2>&1 \ ++! RUN: | FileCheck --check-prefix=CHECK-OBJECT %s ++! CHECK-OBJECT: "flang1" ++! CHECK-OBJECT-NOT: "-preprocess" ++! CHECK-OBJECT-SAME: "-freeform" ++! CHECK-OBJECT-NEXT: "flang2" ++! CHECK-OBJECT-SAME: "-asm" LLFILE:.*.ll ++! CHECK-OBJECT-NEXT: {{clang.* "-cc1"}} ++! CHECK-OBJECT-SAME: "-o" "classic-flang.o" ++! CHECK-OBJECT-SAME: "-x" "ir" ++! CHECK-OBJECT-SAME: LLFILE ++ ++! Check that the driver invokes flang1 correctly when preprocessing is ++! explicitly requested. ++ ++! RUN: %clang --driver-mode=flang -target x86_64-unknown-linux-gnu -E %s -### 2>&1 \ ++! RUN: | FileCheck --check-prefix=CHECK-PREPROCESS %s ++! CHECK-PREPROCESS: "flang1" ++! CHECK-PREPROCESS-SAME: "-preprocess" ++! CHECK-PREPROCESS-SAME: "-es" ++! CHECK-PREPROCESS-SAME: "-pp" ++! CHECK-PREPROCESS-NOT: "flang1" ++! CHECK-PREPROCESS-NOT: "flang2" ++! CHECK-PREPROCESS-NOT: {{clang.* "-cc1"}} ++! CHECK-PREPROCESS-NOT: {{clang.* "-cc1as"}} ++ ++! Check that the backend job (clang -cc1) is not combined into the compile job ++! (flang2) even if -integrated-as is specified. ++ ++! RUN: %clang --driver-mode=flang -target x86_64-unknown-linux-gnu -integrated-as -S %s -### 2>&1 \ ++! RUN: | FileCheck --check-prefix=CHECK-ASM %s ++! CHECK-ASM: "flang1" ++! CHECK-ASM-NEXT: "flang2" ++! CHECK-ASM-SAME: "-asm" LLFILE:.*.ll ++! CHECK-ASM-NEXT: {{clang.* "-cc1"}} ++! CHECK-ASM-SAME: "-o" "classic-flang.s" ++! CHECK-ASM-SAME: "-x" "ir" ++! CHECK-ASM-SAME: LLFILE ++ ++! Check that the linker job is given the correct libraries and library paths. ++ ++! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -mp \ ++! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-MAIN,CHECK-DYNAMIC-FLANG,CHECK-DYNAMIC-OMP %s ++! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -mp -nomp \ ++! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-MAIN,CHECK-DYNAMIC-FLANG,CHECK-NO-OMP %s ++! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -fopenmp \ ++! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-MAIN,CHECK-DYNAMIC-FLANG,CHECK-DYNAMIC-OMP %s ++! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -fopenmp -fno-openmp \ ++! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-MAIN,CHECK-DYNAMIC-FLANG,CHECK-NO-OMP %s ++! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -fopenmp -static-openmp \ ++! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-MAIN,CHECK-DYNAMIC-FLANG,CHECK-STATIC-OMP %s ++! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -fopenmp -static-flang-libs \ ++! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-MAIN,CHECK-STATIC-FLANG,CHECK-DYNAMIC-OMP %s ++! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -static-flang-libs \ ++! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-MAIN,CHECK-STATIC-FLANG,CHECK-NO-OMP %s ++! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -Mnomain \ ++! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-NOMAIN,CHECK-DYNAMIC-FLANG %s ++! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -fno-fortran-main \ ++! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD,CHECK-NOMAIN,CHECK-DYNAMIC-FLANG %s ++! CHECK-LD: "{{.*}}ld{{(.exe)?}}" ++! CHECK-LD-NOT: "-static" ++! CHECK-LD-SAME: "{{^"*}}classic-flang-{{^ *}}.o" ++! CHECK-MAIN-SAME: "-lflangmain" ++! CHECK-NOMAIN-NOT: "-lflangmain" ++! CHECK-LD-SAME: "-lfoo" "-L{{^ */\\+}}basic_linux_tree{{/\\+}}usr{{/\\+}}lib" ++! CHECK-DYNAMIC-FLANG-NOT: "-Bstatic" ++! CHECK-DYNAMIC-FLANG: "-lflang" "-lflangrti" "-lpgmath" "-lpthread" "-lrt" "-lm" ++! CHECK-DYNAMIC-FLANG-NOT: "-Bdynamic" ++! CHECK-STATIC-FLANG: "-Bstatic" "-lflang" "-lflangrti" "-lpgmath" "-Bdynamic" "-lpthread" "-lrt" "-lm" ++! CHECK-DYNAMIC-OMP-NOT: "-Bstatic" ++! CHECK-DYNAMIC-OMP: "-lomp" "-L{{^ */\\+}}basic_linux_tree{{/\\+}}usr{{/\\+}}lib" ++! CHECK-DYNAMIC-OMP-NOT: "-Bdynamic" ++! CHECK-STATIC-OMP: "-Bstatic" "-lomp" "-Bdynamic" "-L{{^ */\\+}}basic_linux_tree{{/\\+}}usr{{/\\+}}lib" ++! CHECK-NO-OMP-NOT: "-lomp" ++ ++! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -static -static-flang-libs \ ++! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD-STATIC,CHECK-NO-OMP %s ++! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -static -fopenmp \ ++! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD-STATIC,CHECK-STATIC-BOTH %s ++! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -static -fopenmp -static-openmp \ ++! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-LD-STATIC,CHECK-STATIC-BOTH %s ++! CHECK-LD-STATIC: "{{.*}}ld{{(.exe)?}}" ++! CHECK-LD-STATIC: "-static" "-o" "a.out" ++! CHECK-LD-STATIC: "{{^"*}}classic-flang-{{^ *}}.o" "-lflangmain" "-lfoo" "-L{{^ */\\+}}basic_linux_tree{{/\\+}}usr{{/\\+}}lib" ++! CHECK-LD-STATIC-NOT: "-Bstatic" ++! CHECK-LD-STATIC: "-lflang" "-lflangrti" "-lpgmath" "-lpthread" "-lrt" "-lm" ++! CHECK-LD-STATIC-NOT: "-Bdynamic" ++! CHECK-STATIC-BOTH-NOT: "-Bstatic" ++! CHECK-STATIC-BOTH: "-lomp" ++! CHECK-STATIC-BOTH-NOT: "-Bdynamic" ++ ++! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -no-flang-libs \ ++! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-NOFLANGLIBS %s ++! CHECK-NOFLANGLIBS: "{{.*}}ld{{(.exe)?}}" ++! CHECK-NOFLANGLIBS-SAME: "{{^"*}}classic-flang-{{^ *}}.o" ++! CHECK-NOFLANGLIBS-NOT: "-lflangmain" ++! CHECK-NOFLANGLIBS-SAME: "-lfoo" "-L{{^ */\\+}}basic_linux_tree{{/\\+}}usr{{/\\+}}lib" ++! CHECK-NOFLANGLIBS-NOT: "-lflang" "-lflangrti" "-lpgmath" ++! CHECK-NOFLANGLIBS: "-lm" "-lgcc" ++! CHECK-NOFLANGLIBS: "-lgcc_s" ++! CHECK-NOFLANGLIBS: "-lc" ++ ++! In Flang mode, we always link with libm, even with -nostdlib. ++! RUN: %flang -target x86_64-linux-gnu -ccc-install-dir %S/../Inputs/basic_linux_tree/usr/bin -nostdlib \ ++! RUN: %s -lfoo -### 2>&1 | FileCheck --check-prefixes=CHECK-NOSTDLIB %s ++! CHECK-NOSTDLIB: "{{.*}}ld{{(.exe)?}}" ++! CHECK-NOSTDLIB-SAME: "{{^"*}}classic-flang-{{^ *}}.o" ++! CHECK-NOSTDLIB-NOT: "-lflangmain" ++! CHECK-NOSTDLIB-SAME: "-lfoo" "-L{{^ */\\+}}basic_linux_tree{{/\\+}}usr{{/\\+}}lib" ++! CHECK-NOSTDLIB-NOT: "-lflang" "-lflangrti" "-lpgmath" ++! CHECK-NOSTDLIB-NOT: "-lpthread" "-lrt" ++! CHECK-NOSTDLIB: "-lm" ++! CHECK-NOSTDLIB-NOT: "-lgcc" ++! CHECK-NOSTDLIB-NOT: "-lgcc_s" ++! CHECK-NOSTDLIB-NOT: "-lc" +diff --git a/clang/test/Driver/flang/flang.f90 b/clang/test/Driver/flang/flang.f90 +index 5d8edf6308b0..8caa45f6342c 100644 +--- a/clang/test/Driver/flang/flang.f90 ++++ b/clang/test/Driver/flang/flang.f90 +@@ -1,5 +1,7 @@ + ! Check that flang -fc1 is invoked when in --driver-mode=flang. + ++! UNSUPPORTED: classic_flang ++ + ! This is a copy of flang_ucase.F90 because the driver has logic in it which + ! differentiates between F90 and f90 files. Flang will not treat these files + ! differently. +diff --git a/clang/test/Driver/flang/flang_ucase.F90 b/clang/test/Driver/flang/flang_ucase.F90 +index 50305ee337e1..21f7c469144b 100644 +--- a/clang/test/Driver/flang/flang_ucase.F90 ++++ b/clang/test/Driver/flang/flang_ucase.F90 +@@ -1,5 +1,7 @@ + ! Check that flang -fc1 is invoked when in --driver-mode=flang. + ++! UNSUPPORTED: classic_flang ++ + ! This is a copy of flang.f90 because the driver has logic in it which + ! differentiates between F90 and f90 files. Flang will not treat these files + ! differently. +diff --git a/clang/test/Driver/flang/llvm-ir-input.f b/clang/test/Driver/flang/llvm-ir-input.f +new file mode 100644 +index 000000000000..c34bf28328cb +--- /dev/null ++++ b/clang/test/Driver/flang/llvm-ir-input.f +@@ -0,0 +1,7 @@ ++! Check that LLVM IR input is passed to clang instead of flang1. ++ ++! REQUIRES: classic_flang ++! RUN: %clang --driver-mode=flang -S %S/Inputs/llvm-ir-input.ll -### 2>&1 | FileCheck %s ++ ++! CHECK-NOT: flang1 ++! CHECK: "{{.*}}clang{{.*}}" "-cc1" +diff --git a/clang/test/Driver/flang/multiple-inputs-mixed.f90 b/clang/test/Driver/flang/multiple-inputs-mixed.f90 +index 2395dbecf1fe..8f3918f7cb21 100644 +--- a/clang/test/Driver/flang/multiple-inputs-mixed.f90 ++++ b/clang/test/Driver/flang/multiple-inputs-mixed.f90 +@@ -1,5 +1,7 @@ + ! Check that flang can handle mixed C and fortran inputs. + ++! UNSUPPORTED: classic_flang ++ + ! RUN: %clang --driver-mode=flang -### -fsyntax-only %S/Inputs/one.f90 %S/Inputs/other.c 2>&1 | FileCheck --check-prefixes=CHECK-SYNTAX-ONLY %s + ! CHECK-SYNTAX-ONLY-LABEL: "{{^"*}}flang-new{{^"/*}}" "-fc1" + ! CHECK-SYNTAX-ONLY: "{{^"*}}/Inputs/one.f90" +diff --git a/clang/test/Driver/flang/multiple-inputs.f90 b/clang/test/Driver/flang/multiple-inputs.f90 +index ada999e927a6..4068ac3a0966 100644 +--- a/clang/test/Driver/flang/multiple-inputs.f90 ++++ b/clang/test/Driver/flang/multiple-inputs.f90 +@@ -1,5 +1,7 @@ + ! Check that flang driver can handle multiple inputs at once. + ++! UNSUPPORTED: classic_flang ++ + ! RUN: %clang --driver-mode=flang -### -fsyntax-only %S/Inputs/one.f90 %S/Inputs/two.f90 2>&1 | FileCheck --check-prefixes=CHECK-SYNTAX-ONLY %s + ! CHECK-SYNTAX-ONLY-LABEL: "{{^"*}}flang-new{{^"/*}}" "-fc1" + ! CHECK-SYNTAX-ONLY: "{{^"*}}/Inputs/one.f90" +diff --git a/clang/test/Driver/flang/reassoc.f90 b/clang/test/Driver/flang/reassoc.f90 +new file mode 100644 +index 000000000000..fe42087f7070 +--- /dev/null ++++ b/clang/test/Driver/flang/reassoc.f90 +@@ -0,0 +1,59 @@ ++! REQUIRES: classic_flang ++ ++! Tests for flags which generate nsw, reassoc attributes ++ ++! RUN: %flang -Kieee %s -### 2>&1 | FileCheck --check-prefixes=IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -Knoieee %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -ffast-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -fno-fast-math %s -### 2>&1 | FileCheck --check-prefixes=NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -frelaxed-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -fassociative-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_RELAXED,REASSOC_NSZ %s ++! RUN: %flang -fno-associative-math %s -### 2>&1 | FileCheck --check-prefixes=NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -fsigned-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -fno-signed-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NSZ %s ++ ++! RUN: %flang -fno-associative-math -fno-signed-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NSZ %s ++! RUN: %flang -fno-associative-math -fsigned-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -fassociative-math -fno-signed-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_RELAXED,REASSOC_NSZ %s ++! RUN: %flang -fassociative-math -fsigned-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++ ++! RUN: %flang -Kieee -fassociative-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_RELAXED,REASSOC_NSZ %s ++! RUN: %flang -Kieee -fno-associative-math %s -### 2>&1 | FileCheck --check-prefixes=IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -Kieee -fsigned-zeros %s -### 2>&1 | FileCheck --check-prefixes=IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -Kieee -fno-signed-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NSZ %s ++! RUN: %flang -ffast-math -fassociative-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_RELAXED,REASSOC_NSZ %s ++! RUN: %flang -ffast-math -fno-associative-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -ffast-math -fsigned-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -ffast-math -fno-signed-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_RELAXED,NO_REASSOC,NSZ %s ++! RUN: %flang -frelaxed-math -fassociative-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,REASSOC_NSZ %s ++! RUN: %flang -frelaxed-math -fno-associative-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -frelaxed-math -fsigned-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -frelaxed-math -fno-signed-zeros %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_REASSOC,NSZ %s ++ ++! RUN: %flang -fassociative-math -Kieee %s -### 2>&1 | FileCheck --check-prefixes=IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -fno-associative-math -Kieee %s -### 2>&1 | FileCheck --check-prefixes=IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -fsigned-zeros -Kieee %s -### 2>&1 | FileCheck --check-prefixes=IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -fno-signed-zeros -Kieee %s -### 2>&1 | FileCheck --check-prefixes=IEEE,NO_FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -fassociative-math -ffast-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_RELAXED,REASSOC_NSZ %s ++! RUN: %flang -fno-associative-math -ffast-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -fsigned-zeros -ffast-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,FAST,NO_RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -fno-signed-zeros -ffast-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_RELAXED,NO_REASSOC,NSZ %s ++! RUN: %flang -fassociative-math -frelaxed-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,REASSOC_NSZ %s ++! RUN: %flang -fno-associative-math -frelaxed-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -fsigned-zeros -frelaxed-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,RELAXED,NO_REASSOC,NO_NSZ %s ++! RUN: %flang -fno-signed-zeros -frelaxed-math %s -### 2>&1 | FileCheck --check-prefixes=NO_IEEE,NO_FAST,NO_REASSOC,NSZ %s ++ ++! IEEE: {{.*}}flang2{{.*}} "-ieee" "1" ++! NO_IEEE-NOT: {{.*}}flang2{{.*}} "-ieee" "1" ++ ++! FAST: {{.*}}flang2{{.*}} "-x" "216" "1" ++! NO_FAST-NOT: {{.*}}flang2{{.*}} "-x" "216" "1" ++ ++! RELAXED: {{.*}}flang2{{.*}} "-x" "15" "0x400" ++! NO_RELAXED-NOT: {{.*}}flang2{{.*}} "-x" "15" "0x400" ++ ++! REASSOC_NSZ: {{.*}}flang2{{.*}} "-x" "216" "0x8" "-x" "216" "0x10" ++! NO_REASSOC-NOT: {{.*}}flang2{{.*}} "-x" "216" "0x10" ++ ++! NSZ: {{.*}}flang2{{.*}} "-x" "216" "0x8" ++! NO_NSZ-NOT: {{.*}}flang2{{.*}} "-x" "216" "0x8" +diff --git a/clang/test/Driver/fortran-phases.f90 b/clang/test/Driver/fortran-phases.f90 +new file mode 100644 +index 000000000000..57d5f2c92b78 +--- /dev/null ++++ b/clang/test/Driver/fortran-phases.f90 +@@ -0,0 +1,119 @@ ++! Test to see that the correct phases are run for the commandline input ++ ++! REQUIRES: classic_flang ++ ++! RUN: %flang -ccc-print-phases 2>&1 %s | FileCheck %s --check-prefix=LINK-NOPP ++! RUN: %flang -ccc-print-phases -c 2>&1 %s | FileCheck %s --check-prefix=CONLY-NOPP ++! RUN: %flang -ccc-print-phases -S 2>&1 %s | FileCheck %s --check-prefix=AONLY-NOPP ++! RUN: %flang -ccc-print-phases -c -emit-llvm 2>&1 %s | FileCheck %s --check-prefix=LLONLY-NOPP ++! RUN: %flang -ccc-print-phases -S -emit-llvm 2>&1 %s | FileCheck %s --check-prefix=LLONLY-NOPP ++! RUN: %flang -ccc-print-phases -emit-flang-llvm 2>&1 %s | FileCheck %s --check-prefix=FLLONLY-NOPP ++! RUN: %flang -ccc-print-phases -fsyntax-only 2>&1 %s | FileCheck %s --check-prefix=SONLY-NOPP ++! RUN: %flang -ccc-print-phases -E 2>&1 %s | FileCheck %s --check-prefix=PPONLY-NOPP ++ ++! RUN: %flang -ccc-print-phases 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=LINK ++! RUN: %flang -ccc-print-phases 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=LINK ++! RUN: %flang -ccc-print-phases -c 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=CONLY ++! RUN: %flang -ccc-print-phases -S 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=AONLY ++! RUN: %flang -ccc-print-phases -c -emit-llvm 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=LLONLY ++! RUN: %flang -ccc-print-phases -S -emit-llvm 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=LLONLY ++! RUN: %flang -ccc-print-phases -emit-flang-llvm 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=FLLONLY ++! RUN: %flang -ccc-print-phases -fsyntax-only 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=SONLY ++! RUN: %flang -ccc-print-phases -E 2>&1 -x f95-cpp-input %s | FileCheck %s --check-prefix=PPONLY ++ ++! LINK-NOPP: 0: input, {{.*}}, f95 ++! LINK-NOPP: 1: compiler, {0}, ir ++! LINK-NOPP: 2: backend, {1}, assembler ++! LINK-NOPP: 3: assembler, {2}, object ++! LINK-NOPP: 4: linker, {3}, image ++ ++! CONLY-NOPP: 0: input, {{.*}}, f95 ++! CONLY-NOPP: 1: compiler, {0}, ir ++! CONLY-NOPP: 2: backend, {1}, assembler ++! CONLY-NOPP: 3: assembler, {2}, object ++! CONLY-NOPP-NOT: {{.*}}: linker, {{{.*}}}, image ++ ++! AONLY-NOPP: 0: input, {{.*}}, f95 ++! AONLY-NOPP: 1: compiler, {0}, ir ++! AONLY-NOPP: 2: backend, {1}, assembler ++! AONLY-NOPP-NOT: {{.*}}: assembler, {{{.*}}}, object ++! AONLY-NOPP-NOT: {{.*}}: linker, {{{.*}}}, image ++ ++! LLONLY-NOPP: 0: input, {{.*}}, f95 ++! LLONLY-NOPP: 1: compiler, {0}, ir ++! LLONLY-NOPP-NOT: {{.*}}: backend, {{{.*}}}, assembler ++! LLONLY-NOPP-NOT: {{.*}}: assembler, {{{.*}}}, object ++! LLONLY-NOPP-NOT: {{.*}}: linker, {{{.*}}}, image ++ ++! FLLONLY-NOPP: 0: input, {{.*}}, f95 ++! FLLONLY-NOPP: 1: compiler, {0}, ir ++! FLLONLY-NOPP-NOT: {{.*}}: backend, {{{.*}}}, assembler ++! FLLONLY-NOPP-NOT: {{.*}}: assembler, {{{.*}}}, object ++! FLLONLY-NOPP-NOT: {{.*}}: linker, {{{.*}}}, image ++ ++! SONLY-NOPP: 0: input, {{.*}}, f95 ++! SONLY-NOPP-NOT: {{.*}}: compiler, {{{.*}}}, ir ++! SONLY-NOPP-NOT: {{.*}}: backend, {{{.*}}}, assembler ++! SONLY-NOPP-NOT: {{.*}}: assembler, {{{.*}}}, object ++! SONLY-NOPP-NOT: {{.*}}: linker, {{{.*}}}, image ++ ++! flang always preprocesses with -E regardless of file extension ++! PPONLY-NOPP: 0: input, {{.*}}, f95 ++! PPONLY-NOPP: 1: preprocessor, {0}, f95 ++! PPONLY-NOPP-NOT: {{.*}}: compiler, {{{.*}}}, ir ++! PPONLY-NOPP-NOT: {{.*}}: backend, {{{.*}}}, assembler ++! PPONLY-NOPP-NOT: {{.*}}: assembler, {{{.*}}}, object ++! PPONLY-NOPP-NOT: {{.*}}: linker, {{{.*}}}, image ++ ++! LINK: 0: input, {{.*}}, f95-cpp-input ++! LINK: 1: preprocessor, {0}, f95 ++! LINK: 2: compiler, {1}, ir ++! LINK: 3: backend, {2}, assembler ++! LINK: 4: assembler, {3}, object ++! LINK: 5: linker, {4}, image ++ ++! CONLY: 0: input, {{.*}}, f95-cpp-input ++! CONLY: 1: preprocessor, {0}, f95 ++! CONLY: 2: compiler, {1}, ir ++! CONLY: 3: backend, {2}, assembler ++! CONLY: 4: assembler, {3}, object ++! CONLY-NOT: {{.*}}: linker, {{{.*}}}, image ++ ++! AONLY: 0: input, {{.*}}, f95-cpp-input ++! AONLY: 1: preprocessor, {0}, f95 ++! AONLY: 2: compiler, {1}, ir ++! AONLY: 3: backend, {2}, assembler ++! AONLY-NOT: {{.*}}: assembler, {{{.*}}}, object ++! AONLY-NOT: {{.*}}: linker, {{{.*}}}, image ++ ++! LLONLY: 0: input, {{.*}}, f95-cpp-input ++! LLONLY: 1: preprocessor, {0}, f95 ++! LLONLY: 2: compiler, {1}, ir ++! LLONLY-NOT: {{.*}}: backend, {{{.*}}}, assembler ++! LLONLY-NOT: {{.*}}: assembler, {{{.*}}}, object ++! LLONLY-NOT: {{.*}}: linker, {{{.*}}}, image ++ ++! FLLONLY: 0: input, {{.*}}, f95-cpp-input ++! FLLONLY: 1: preprocessor, {0}, f95 ++! FLLONLY: 2: compiler, {1}, ir ++! FLLONLY-NOT: {{.*}}: backend, {{{.*}}}, assembler ++! FLLONLY-NOT: {{.*}}: assembler, {{{.*}}}, object ++! FLLONLY-NOT: {{.*}}: linker, {{{.*}}}, image ++ ++! SONLY: 0: input, {{.*}}, f95-cpp-input ++! SONLY: 1: preprocessor, {0}, f95 ++! SONLY-NOT: {{.*}}: compiler, {{{.*}}}, ir ++! SONLY-NOT: {{.*}}: backend, {{{.*}}}, assembler ++! SONLY-NOT: {{.*}}: assembler, {{{.*}}}, object ++! SONLY-NOT: {{.*}}: linker, {{{.*}}}, image ++ ++! PPONLY: 0: input, {{.*}}, f95-cpp-input ++! PPONLY: 1: preprocessor, {0}, f95 ++! PPONLY-NOT: {{.*}}: compiler, {{{.*}}}, ir ++! PPONLY-NOT: {{.*}}: backend, {{{.*}}}, assembler ++! PPONLY-NOT: {{.*}}: assembler, {{{.*}}}, object ++! PPONLY-NOT: {{.*}}: linker, {{{.*}}}, image ++ ++program hello ++ write(*, *) "Hello" ++end program hello +diff --git a/clang/test/Driver/fortran-preprocessor.f90 b/clang/test/Driver/fortran-preprocessor.f90 +new file mode 100644 +index 000000000000..d9e5620145ac +--- /dev/null ++++ b/clang/test/Driver/fortran-preprocessor.f90 +@@ -0,0 +1,48 @@ ++! REQUIRES: classic_flang ++ ++! -cpp should preprocess as it goes, regardless of input file extension ++! RUN: %flang -cpp -c -DHELLO="hello all" -### %s 2>&1 | FileCheck %s --check-prefixes=ALL,CPP,PP ++! RUN: %flang -cpp -c -DHELLO="hello all" -### -x f95-cpp-input %s 2>&1 | FileCheck %s --check-prefixes=ALL,CPP,PP ++! -E should preprocess then stop, regardless of input file extension ++! RUN: %flang -E -DHELLO="hello all" -### %s 2>&1 | FileCheck %s --check-prefixes=ALL,E,PPONLY ++! RUN: %flang -E -DHELLO="hello all" -### -x f95-cpp-input %s 2>&1 | FileCheck %s --check-prefixes=ALL,E,PPONLY ++! -cpp and -E are redundant ++! RUN: %flang -E -cpp -DHELLO="hello all" -### %s 2>&1 | FileCheck %s --check-prefixes=ALL,E,PPONLY ++ ++! Don't link when given linker input ++! RUN: %flang -E -cpp -Wl,-rpath=blah -### %s 2>&1 | FileCheck %s --check-prefixes=ALL,E,PPONLY ++ ++! Explicitly test this nonsence case causing a bug with LLVM 13/14 ++! RUN: %flang -E -traditional-cpp -DHELLO="hello all" -x f95-cpp-input -### %s 2>&1 | FileCheck %s --check-prefixes=ALL,E,PPONLY ++ ++! Test -save-temps does not break things (same codepath as -traditional-cpp bug above) ++! RUN: %flang -E -DHELLO="hello all" -save-temps -### %s 2>&1 | FileCheck %s --check-prefixes=ALL,E,PPONLY ++! RUN: %flang -E -DHELLO="hello all" -save-temps -### -x f95-cpp-input %s 2>&1 | FileCheck %s --check-prefixes=ALL,E,PPONLY ++! RUN: %flang -cpp -c -DHELLO="hello all" -save-temps -### %s 2>&1 | FileCheck %s --check-prefixes=ALL,CPP,PP ++! RUN: %flang -cpp -c -DHELLO="hello all" -save-temps -### -x f95-cpp-input %s 2>&1 | FileCheck %s --check-prefixes=ALL,CPP,PP ++ ++! Test for the correct cmdline flags ++! Consume up to flang1 line ++! ALL-LABEL: "{{.*}}flang1" ++! CPP-NOT: "-es" ++! CPP: "-preprocess" ++! CPP-NOT: "-es" ++ ++! E-DAG: "-es" ++! E-DAG: "-preprocess" ++ ++! CPP should continue to build object ++! PP: "{{.*}}flang2" ++! PPONLY-NOT: "{{.*}}flang2" ++ ++! flang1 and flang2 should only be called at most once! ++! ALL-NOT: "{{.*}}flang1" ++! ALL-NOT: "{{.*}}flang2" ++ ++! These commands should never call a linker! ++! ALL-NOT: "{{.*}}ld" ++ ++program hello ++ write(*, *) HELLO ++end program hello ++ +diff --git a/clang/test/Driver/fortran.f95 b/clang/test/Driver/fortran.f95 +index db3ff2da17e8..e364180a1f9c 100644 +--- a/clang/test/Driver/fortran.f95 ++++ b/clang/test/Driver/fortran.f95 +@@ -1,6 +1,8 @@ + ! Check that the clang driver can invoke gcc to compile Fortran when in + ! --driver-mode=clang. This is legacy behaviour - see also --driver-mode=flang. + ++! UNSUPPORTED: classic_flang ++ + ! RUN: %clang -target x86_64-unknown-linux-gnu -integrated-as -c %s -### 2>&1 \ + ! RUN: | FileCheck --check-prefix=CHECK-OBJECT %s + ! CHECK-OBJECT: gcc +diff --git a/clang/test/Driver/gfortran.f90 b/clang/test/Driver/gfortran.f90 +index c985428650ec..1276703b1f6e 100644 +--- a/clang/test/Driver/gfortran.f90 ++++ b/clang/test/Driver/gfortran.f90 +@@ -1,4 +1,5 @@ + ! XFAIL: * ++! UNSUPPORTED: classic_flang + ! Test that Clang can forward all of the flags which are documented as + ! being supported by gfortran to GCC when falling back to GCC for + ! a fortran input file. +diff --git a/clang/test/Driver/lit.local.cfg b/clang/test/Driver/lit.local.cfg +index 6370e9f92d89..5ec1f9a8fc71 100644 +--- a/clang/test/Driver/lit.local.cfg ++++ b/clang/test/Driver/lit.local.cfg +@@ -12,6 +12,9 @@ config.suffixes = + ".f90", + ".F90", + ".f95", ++ '.F95', ++ '.f', ++ '.F', + ".cu", + ".rs", + ".cl", +diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py +index 68e038475a5c..bb6fb1ae5146 100644 +--- a/clang/test/lit.cfg.py ++++ b/clang/test/lit.cfg.py +@@ -395,3 +395,6 @@ if "system-aix" in config.available_features: + # possibly be present in system and user configuration files, so disable + # default configs for the test runs. + config.environment"CLANG_NO_DEFAULT_CONFIG" = "1" ++ ++if config.use_classic_flang: ++ config.available_features.add("classic_flang") +diff --git a/clang/test/lit.site.cfg.py.in b/clang/test/lit.site.cfg.py.in +index ef75770a2c3c..bc278740e0d0 100644 +--- a/clang/test/lit.site.cfg.py.in ++++ b/clang/test/lit.site.cfg.py.in +@@ -41,6 +41,7 @@ config.llvm_external_lit = path(r"@LLVM_EXTERNAL_LIT@") + config.standalone_build = @CLANG_BUILT_STANDALONE@ + config.ppc_linux_default_ieeelongdouble = @PPC_LINUX_DEFAULT_IEEELONGDOUBLE@ + config.have_llvm_driver = @LLVM_TOOL_LLVM_DRIVER_BUILD@ ++config.use_classic_flang = @LLVM_ENABLE_CLASSIC_FLANG@ + + import lit.llvm + lit.llvm.initialize(lit_config, config) +diff --git a/clang/tools/driver/CMakeLists.txt b/clang/tools/driver/CMakeLists.txt +index 2182486f93a5..c269561fde69 100644 +--- a/clang/tools/driver/CMakeLists.txt ++++ b/clang/tools/driver/CMakeLists.txt +@@ -59,7 +59,7 @@ endif() + add_dependencies(clang clang-resource-headers) + + if(NOT CLANG_LINKS_TO_CREATE) +- set(CLANG_LINKS_TO_CREATE clang++ clang-cl clang-cpp) ++ set(CLANG_LINKS_TO_CREATE clang++ clang-cl clang-cpp flang) + endif() + + if (CLANG_ENABLE_HLSL) +-- +2.24.3 (Apple Git-128) +
View file
_service:tar_scm:0012-Fix-declaration-definition-mismatch-for-classic-flang.patch
Added
@@ -0,0 +1,326 @@ +From bf04bad2c43a69730e1cdf4dcd2d06ab786bda33 Mon Sep 17 00:00:00 2001 +From: luofeng14 <luofeng13@huawei.com> +Date: Wed, 17 Apr 2024 10:49:19 +0800 +Subject: PATCH Fix declaration definition mismatch for classic flang + +--- + .../clang/Basic/DiagnosticDriverKinds.td | 4 ++++ + clang/include/clang/Basic/MacroBuilder.h | 7 ++++++- + clang/include/clang/Basic/Sanitizers.def | 2 ++ + clang/include/clang/Driver/Options.td | 1 - + clang/include/clang/Frontend/Utils.h | 10 ++++++++- + clang/lib/CodeGen/CGDebugInfo.cpp | 21 +++++++++++++++---- + clang/lib/Driver/Driver.cpp | 1 - + clang/lib/Frontend/InitPreprocessor.cpp | 21 +++++++++++++++++++ + .../test/CodeGen/libpgmath-logfun-aarch64.ll | 1 + + clang/test/CodeGen/libpgmath-logfun-x86_64.ll | 2 +- + clang/test/Driver/autocomplete.c | 2 +- + .../flang/classic-flang-vscale-mbits.f95 | 2 ++ + .../Driver/flang/classic-flang-vscale.f95 | 2 ++ + clang/tools/driver/CMakeLists.txt | 6 +++++- + 14 files changed, 71 insertions(+), 11 deletions(-) + +diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td +index 09a1949d7596..62a60e65c6a4 100644 +--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td ++++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td +@@ -131,10 +131,12 @@ def err_drv_invalid_linker_name : Error< + "invalid linker name in argument '%0'">; + def err_drv_invalid_rtlib_name : Error< + "invalid runtime library name in argument '%0'">; ++#ifdef ENABLE_CLASSIC_FLANG + def err_drv_invalid_allocatable_mode : Error< + "invalid semantic mode for assignments to allocatables in argument '%0'">; + def err_drv_unsupported_fixed_line_length : Error< + "unsupported fixed-format line length in argument '%0'">; ++#endif + def err_drv_unsupported_rtlib_for_platform : Error< + "unsupported runtime library '%0' for platform '%1'">; + def err_drv_invalid_unwindlib_name : Error< +@@ -358,8 +360,10 @@ def err_drv_negative_columns : Error< + "invalid value '%1' in '%0', value must be 'none' or a positive integer">; + def err_drv_small_columns : Error< + "invalid value '%1' in '%0', value must be '%2' or greater">; ++#ifdef ENABLE_CLASSIC_FLANG + def err_drv_clang_unsupported_minfo_arg : Error< + "'%0' option does not support '%1' value">; ++#endif + + def err_drv_invalid_malign_branch_EQ : Error< + "invalid argument '%0' to -malign-branch=; each element must be one of: %1">; +diff --git a/clang/include/clang/Basic/MacroBuilder.h b/clang/include/clang/Basic/MacroBuilder.h +index bfc5e38c15e3..54e9ea643394 100644 +--- a/clang/include/clang/Basic/MacroBuilder.h ++++ b/clang/include/clang/Basic/MacroBuilder.h +@@ -24,10 +24,15 @@ class MacroBuilder { + raw_ostream &Out; + public: + MacroBuilder(raw_ostream &Output) : Out(Output) {} ++ #ifdef ENABLE_CLASSIC_FLANG + virtual ~MacroBuilder() {} +- ++ #endif + /// Append a \#define line for macro of the form "\#define Name Value\n". ++ #ifdef ENABLE_CLASSIC_FLANG + virtual void defineMacro(const Twine &Name, const Twine &Value = "1") { ++ #else ++ void defineMacro(const Twine &Name, const Twine &Value = "1") { ++ #endif + Out << "#define " << Name << ' ' << Value << '\n'; + } + +diff --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def +index 4f0a2bf332ef..a8fcba677ee5 100644 +--- a/clang/include/clang/Basic/Sanitizers.def ++++ b/clang/include/clang/Basic/Sanitizers.def +@@ -107,8 +107,10 @@ SANITIZER("signed-integer-overflow", SignedIntegerOverflow) + SANITIZER("unreachable", Unreachable) + SANITIZER("vla-bound", VLABound) + SANITIZER("vptr", Vptr) ++#ifdef ENABLE_CLASSIC_FLANG + // fortran contiguous pointer checks + SANITIZER("discontiguous", Discontiguous) ++#endif + + // IntegerSanitizer + SANITIZER("unsigned-integer-overflow", UnsignedIntegerOverflow) +diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td +index 365285966a67..dab35f22e4ac 100644 +--- a/clang/include/clang/Driver/Options.td ++++ b/clang/include/clang/Driver/Options.td +@@ -5473,7 +5473,6 @@ defm stack_arrays : BoolOptionWithoutMarshalling<"f", "stack-arrays", + defm loop_versioning : BoolOptionWithoutMarshalling<"f", "version-loops-for-stride", + PosFlag<SetTrue, , "Create unit-strided versions of loops">, + NegFlag<SetFalse, , "Do not create unit-strided loops (default)">>; +- + } // let Flags = FC1Option, FlangOption, FlangOnlyOption + + #ifdef ENABLE_CLASSIC_FLANG +diff --git a/clang/include/clang/Frontend/Utils.h b/clang/include/clang/Frontend/Utils.h +index c51c617c1379..4ac76dd348c6 100644 +--- a/clang/include/clang/Frontend/Utils.h ++++ b/clang/include/clang/Frontend/Utils.h +@@ -17,10 +17,12 @@ + #include "clang/Basic/LLVM.h" + #include "clang/Driver/OptionUtils.h" + #include "clang/Frontend/DependencyOutputOptions.h" ++#ifdef ENABLE_CLASSIC_FLANG + #include "clang/Basic/TargetInfo.h" ++#include "llvm/ADT/StringExtras.h" ++#endif + #include "llvm/ADT/ArrayRef.h" + #include "llvm/ADT/IntrusiveRefCntPtr.h" +-#include "llvm/ADT/StringExtras.h" + #include "llvm/ADT/StringMap.h" + #include "llvm/ADT/StringRef.h" + #include "llvm/ADT/StringSet.h" +@@ -33,11 +35,13 @@ + #include <utility> + #include <vector> + ++#ifdef ENABLE_CLASSIC_FLANG + namespace llvm { + + class StringRef; + + } // namespace llvm ++#endif + + namespace clang { + +@@ -47,7 +51,9 @@ class CompilerInvocation; + class DiagnosticsEngine; + class ExternalSemaSource; + class FrontendOptions; ++#ifdef ENABLE_CLASSIC_FLANG + class MacroBuilder; ++#endif + class PCHContainerReader; + class Preprocessor; + class PreprocessorOptions; +@@ -63,6 +69,7 @@ void InitializePreprocessor(Preprocessor &PP, const PreprocessorOptions &PPOpts, + void DoPrintPreprocessedInput(Preprocessor &PP, raw_ostream *OS, + const PreprocessorOutputOptions &Opts); + ++#ifdef ENABLE_CLASSIC_FLANG + /// DefineTypeSize - Emit a macro to the predefines buffer that declares a macro + /// named MacroName with the max value for a type with width 'TypeWidth' a + /// signedness of 'isSigned' and with a value suffix of 'ValSuffix' (e.g. LL). +@@ -85,6 +92,7 @@ static void DefineTypeSize(const Twine &MacroName, TargetInfo::IntType Ty, + DefineTypeSize(MacroName, TI.getTypeWidth(Ty), TI.getTypeConstantSuffix(Ty), + TI.isTypeSigned(Ty), Builder); + } ++#endif + + /// An interface for collecting the dependencies of a compilation. Users should + /// use \c attachToPreprocessor and \c attachToASTReader to get all of the +diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp +index 3b140860cce4..30d62ae8faf6 100644 +--- a/clang/lib/CodeGen/CGDebugInfo.cpp ++++ b/clang/lib/CodeGen/CGDebugInfo.cpp +@@ -3981,7 +3981,10 @@ CGDebugInfo::getGlobalVariableForwardDeclaration(const VarDecl *VD) { + auto *GV = DBuilder.createTempGlobalVariableFwdDecl( + DContext, Name, LinkageName, Unit, Line, getOrCreateType(T, Unit), + !VD->isExternallyVisible(), nullptr, TemplateParameters, +- llvm::DINode::FlagZero, Align); ++#ifdef ENABLE_CLASSIC_FLANG ++ llvm::DINode::FlagZero, ++#endif ++ Align); + FwdDeclReplaceMap.emplace_back( + std::piecewise_construct, + std::make_tuple(cast<VarDecl>(VD->getCanonicalDecl())), +@@ -5468,7 +5471,10 @@ void CGDebugInfo::EmitGlobalVariable(llvm::GlobalVariable *Var, + Var->hasLocalLinkage(), true, + Expr.empty() ? nullptr : DBuilder.createExpression(Expr), + getOrCreateStaticDataMemberDeclarationOrNull(D), TemplateParameters, +- llvm::DINode::FlagZero, Align, Annotations); ++#ifdef ENABLE_CLASSIC_FLANG ++ llvm::DINode::FlagZero, ++#endif ++ Align, Annotations); + Var->addDebugInfo(GVE); + } + DeclCacheD->getCanonicalDecl().reset(GVE); +@@ -5564,7 +5570,11 @@ void CGDebugInfo::EmitGlobalVariable(const ValueDecl *VD, const APValue &Init) { + GV.reset(DBuilder.createGlobalVariableExpression( + DContext, Name, StringRef(), Unit, getLineNumber(VD->getLocation()), Ty, + true, true, InitExpr, getOrCreateStaticDataMemberDeclarationOrNull(VarD), +- TemplateParameters, llvm::DINode::FlagZero, Align)); ++ TemplateParameters, ++#ifdef ENABLE_CLASSIC_FLANG ++ llvm::DINode::FlagZero, ++#endif ++ Align)); + } + + void CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var, +@@ -5582,7 +5592,10 @@ void CGDebugInfo::EmitExternalVariable(llvm::GlobalVariable *Var, + llvm::DIGlobalVariableExpression *GVE = + DBuilder.createGlobalVariableExpression( + DContext, Name, StringRef(), Unit, getLineNumber(D->getLocation()), +- Ty, false, false, nullptr, nullptr, nullptr, llvm::DINode::FlagZero, ++ Ty, false, false, nullptr, nullptr, nullptr, ++#ifdef ENABLE_CLASSIC_FLANG ++ llvm::DINode::FlagZero, ++#endif + Align); + Var->addDebugInfo(GVE); + } +diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp +index 8bfee5928fe5..819d7703b2e7 100644 +--- a/clang/lib/Driver/Driver.cpp ++++ b/clang/lib/Driver/Driver.cpp +@@ -377,7 +377,6 @@ phases::ID Driver::getFinalPhase(const DerivedArgList &DAL, + (PhaseArg = DAL.getLastArg(options::OPT_fmodule_header, + options::OPT_fmodule_header_EQ))) { + FinalPhase = phases::Precompile; +- + // -{fsyntax-only,-analyze,emit-ast} only run up to the compiler. + } else if ((PhaseArg = DAL.getLastArg(options::OPT_fsyntax_only)) || + (PhaseArg = DAL.getLastArg(options::OPT_print_supported_cpus)) || +diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp +index 7b01cb01a45e..05682ae9f135 100644 +--- a/clang/lib/Frontend/InitPreprocessor.cpp ++++ b/clang/lib/Frontend/InitPreprocessor.cpp +@@ -161,6 +161,27 @@ static void DefineFloatMacros(MacroBuilder &Builder, StringRef Prefix, + Builder.defineMacro(DefPrefix + "MIN__", Twine(Min)+Ext); + } + ++#ifndef ENABLE_CLASSIC_FLANG ++/// DefineTypeSize - Emit a macro to the predefines buffer that declares a macro ++/// named MacroName with the max value for a type with width 'TypeWidth' a ++/// signedness of 'isSigned' and with a value suffix of 'ValSuffix' (e.g. LL). ++static void DefineTypeSize(const Twine &MacroName, unsigned TypeWidth, ++ StringRef ValSuffix, bool isSigned, ++ MacroBuilder &Builder) { ++ llvm::APInt MaxVal = isSigned ? llvm::APInt::getSignedMaxValue(TypeWidth) ++ : llvm::APInt::getMaxValue(TypeWidth); ++ Builder.defineMacro(MacroName, toString(MaxVal, 10, isSigned) + ValSuffix); ++} ++ ++/// DefineTypeSize - An overloaded helper that uses TargetInfo to determine ++/// the width, suffix, and signedness of the given type ++static void DefineTypeSize(const Twine &MacroName, TargetInfo::IntType Ty, ++ const TargetInfo &TI, MacroBuilder &Builder) { ++ DefineTypeSize(MacroName, TI.getTypeWidth(Ty), TI.getTypeConstantSuffix(Ty), ++ TI.isTypeSigned(Ty), Builder); ++} ++#endif ++ + static void DefineFmt(const Twine &Prefix, TargetInfo::IntType Ty, + const TargetInfo &TI, MacroBuilder &Builder) { + bool IsSigned = TI.isTypeSigned(Ty); +diff --git a/clang/test/CodeGen/libpgmath-logfun-aarch64.ll b/clang/test/CodeGen/libpgmath-logfun-aarch64.ll +index 141fed29ccd1..be05a472016c 100644 +--- a/clang/test/CodeGen/libpgmath-logfun-aarch64.ll ++++ b/clang/test/CodeGen/libpgmath-logfun-aarch64.ll +@@ -1,3 +1,4 @@ ++; REQUIRES: classic_flang + ; REQUIRES: aarch64-registered-target + + ; RUN: %clang -target aarch64-unknown-linux-gnu -Ofast -S %s -o - | FileCheck %s +diff --git a/clang/test/CodeGen/libpgmath-logfun-x86_64.ll b/clang/test/CodeGen/libpgmath-logfun-x86_64.ll +index 3ce1d910947f..c89e538f34e3 100644 +--- a/clang/test/CodeGen/libpgmath-logfun-x86_64.ll ++++ b/clang/test/CodeGen/libpgmath-logfun-x86_64.ll +@@ -1,5 +1,5 @@ + ; REQUIRES: x86-registered-target +- ++; REQUIRES: classic_flang + ; RUN: %clang -target x86_64-unknown-linux-gnu -msse -Ofast -S %s -o - | FileCheck %s + + target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" +diff --git a/clang/test/Driver/autocomplete.c b/clang/test/Driver/autocomplete.c +index 8c4921c9dbd5..d6f57708b67e 100644 +--- a/clang/test/Driver/autocomplete.c ++++ b/clang/test/Driver/autocomplete.c +@@ -85,7 +85,7 @@ + // FVECLIBALL-NEXT: libmvec + // FVECLIBALL-NEXT: MASSV + // FVECLIBALL-NEXT: none +-// FVECLIBALL: SLEEF ++// FVECLIBALL-NEXT: SLEEF + // FVECLIBALL-NEXT: SVML + // RUN: %clang --autocomplete=-fshow-overloads= | FileCheck %s -check-prefix=FSOVERALL + // FSOVERALL: all +diff --git a/clang/test/Driver/flang/classic-flang-vscale-mbits.f95 b/clang/test/Driver/flang/classic-flang-vscale-mbits.f95 +index f0ed43aa027a..089993be7b04 100644 +--- a/clang/test/Driver/flang/classic-flang-vscale-mbits.f95 ++++ b/clang/test/Driver/flang/classic-flang-vscale-mbits.f95 +@@ -1,3 +1,5 @@ ++! REQUIRES: classic_flang ++ + // RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve -msve-vector-bits=128 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE-128 %s + // RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve -msve-vector-bits=128+ %s 2>&1 | FileCheck -check-prefix=CHECK-SVE-128PLUS %s + // RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve -msve-vector-bits=256 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE-256 %s +diff --git a/clang/test/Driver/flang/classic-flang-vscale.f95 b/clang/test/Driver/flang/classic-flang-vscale.f95 +index 8110be594db5..b8496975758f 100644 +--- a/clang/test/Driver/flang/classic-flang-vscale.f95 ++++ b/clang/test/Driver/flang/classic-flang-vscale.f95 +@@ -1,3 +1,5 @@ ++! REQUIRES: classic_flang ++ + // RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a %s 2>&1 | FileCheck -check-prefix=CHECK-NEON %s + // RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve %s 2>&1 | FileCheck -check-prefix=CHECK-SVE %s + // RUN: %clang --driver-mode=flang -### -S --target=aarch64 -march=armv8-a+sve2 %s 2>&1 | FileCheck -check-prefix=CHECK-SVE2 %s +diff --git a/clang/tools/driver/CMakeLists.txt b/clang/tools/driver/CMakeLists.txt +index c269561fde69..465b05c9c0e9 100644 +--- a/clang/tools/driver/CMakeLists.txt ++++ b/clang/tools/driver/CMakeLists.txt +@@ -59,7 +59,11 @@ endif() + add_dependencies(clang clang-resource-headers) + + if(NOT CLANG_LINKS_TO_CREATE) +- set(CLANG_LINKS_TO_CREATE clang++ clang-cl clang-cpp flang) ++ if (LLVM_ENABLE_CLASSIC_FLANG) ++ set(CLANG_LINKS_TO_CREATE clang++ clang-cl clang-cpp flang) ++ else() ++ set(CLANG_LINKS_TO_CREATE clang++ clang-cl clang-cpp) ++ endif() + endif() + + if (CLANG_ENABLE_HLSL) +-- +2.24.3 (Apple Git-128) +
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2