Projects
home:dingli:branches:openEuler:24.09-openjdk
openjdk-11
_service:tar_scm:8231441-2-AArch64-Initial-SVE-...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:8231441-2-AArch64-Initial-SVE-backend-support.patch of Package openjdk-11
diff --git a/src/hotspot/cpu/aarch64/aarch64.ad b/src/hotspot/cpu/aarch64/aarch64.ad index b64919a62..64985e498 100644 --- a/src/hotspot/cpu/aarch64/aarch64.ad +++ b/src/hotspot/cpu/aarch64/aarch64.ad @@ -69,7 +69,7 @@ register %{ // // r0-r7,r10-r26 volatile (caller save) // r27-r32 system (no save, no allocate) -// r8-r9 invisible to the allocator (so we can use them as scratch regs) +// r8-r9 non-allocatable (so we can use them as scratch regs) // // as regards Java usage. we don't use any callee save registers // because this makes it difficult to de-optimise a frame (see comment @@ -94,6 +94,10 @@ reg_def R6 ( SOC, SOC, Op_RegI, 6, r6->as_VMReg() ); reg_def R6_H ( SOC, SOC, Op_RegI, 6, r6->as_VMReg()->next() ); reg_def R7 ( SOC, SOC, Op_RegI, 7, r7->as_VMReg() ); reg_def R7_H ( SOC, SOC, Op_RegI, 7, r7->as_VMReg()->next() ); +reg_def R8 ( NS, SOC, Op_RegI, 8, r8->as_VMReg() ); // rscratch1, non-allocatable +reg_def R8_H ( NS, SOC, Op_RegI, 8, r8->as_VMReg()->next() ); +reg_def R9 ( NS, SOC, Op_RegI, 9, r9->as_VMReg() ); // rscratch2, non-allocatable +reg_def R9_H ( NS, SOC, Op_RegI, 9, r9->as_VMReg()->next() ); reg_def R10 ( SOC, SOC, Op_RegI, 10, r10->as_VMReg() ); reg_def R10_H ( SOC, SOC, Op_RegI, 10, r10->as_VMReg()->next()); reg_def R11 ( SOC, SOC, Op_RegI, 11, r11->as_VMReg() ); @@ -140,7 +144,7 @@ reg_def R31 ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg() ); // sp reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next()); // ---------------------------- -// Float/Double Registers +// Float/Double/Vector Registers // ---------------------------- // Double Registers @@ -161,165 +165,317 @@ reg_def R31_H ( NS, NS, Op_RegI, 31, r31_sp->as_VMReg()->next()); // the platform ABI treats v8-v15 as callee save). float registers // v16-v31 are SOC as per the platform spec - reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() ); - reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() ); - reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) ); - reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) ); - - reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() ); - reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() ); - reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) ); - reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) ); - - reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() ); - reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() ); - reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) ); - reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) ); - - reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() ); - reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() ); - reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) ); - reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) ); - - reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() ); - reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() ); - reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) ); - reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) ); - - reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() ); - reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() ); - reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) ); - reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) ); - - reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() ); - reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() ); - reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) ); - reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) ); - - reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() ); - reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() ); - reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) ); - reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) ); - - reg_def V8 ( SOC, SOE, Op_RegF, 8, v8->as_VMReg() ); - reg_def V8_H ( SOC, SOE, Op_RegF, 8, v8->as_VMReg()->next() ); - reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) ); - reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) ); - - reg_def V9 ( SOC, SOE, Op_RegF, 9, v9->as_VMReg() ); - reg_def V9_H ( SOC, SOE, Op_RegF, 9, v9->as_VMReg()->next() ); - reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) ); - reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) ); - - reg_def V10 ( SOC, SOE, Op_RegF, 10, v10->as_VMReg() ); - reg_def V10_H( SOC, SOE, Op_RegF, 10, v10->as_VMReg()->next() ); - reg_def V10_J( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2)); - reg_def V10_K( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3)); - - reg_def V11 ( SOC, SOE, Op_RegF, 11, v11->as_VMReg() ); - reg_def V11_H( SOC, SOE, Op_RegF, 11, v11->as_VMReg()->next() ); - reg_def V11_J( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2)); - reg_def V11_K( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3)); - - reg_def V12 ( SOC, SOE, Op_RegF, 12, v12->as_VMReg() ); - reg_def V12_H( SOC, SOE, Op_RegF, 12, v12->as_VMReg()->next() ); - reg_def V12_J( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2)); - reg_def V12_K( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3)); - - reg_def V13 ( SOC, SOE, Op_RegF, 13, v13->as_VMReg() ); - reg_def V13_H( SOC, SOE, Op_RegF, 13, v13->as_VMReg()->next() ); - reg_def V13_J( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2)); - reg_def V13_K( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3)); - - reg_def V14 ( SOC, SOE, Op_RegF, 14, v14->as_VMReg() ); - reg_def V14_H( SOC, SOE, Op_RegF, 14, v14->as_VMReg()->next() ); - reg_def V14_J( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2)); - reg_def V14_K( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3)); - - reg_def V15 ( SOC, SOE, Op_RegF, 15, v15->as_VMReg() ); - reg_def V15_H( SOC, SOE, Op_RegF, 15, v15->as_VMReg()->next() ); - reg_def V15_J( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2)); - reg_def V15_K( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3)); - - reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() ); - reg_def V16_H( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() ); - reg_def V16_J( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2)); - reg_def V16_K( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3)); - - reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() ); - reg_def V17_H( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() ); - reg_def V17_J( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2)); - reg_def V17_K( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3)); - - reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() ); - reg_def V18_H( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() ); - reg_def V18_J( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2)); - reg_def V18_K( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3)); - - reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() ); - reg_def V19_H( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() ); - reg_def V19_J( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2)); - reg_def V19_K( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3)); - - reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() ); - reg_def V20_H( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() ); - reg_def V20_J( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2)); - reg_def V20_K( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3)); - - reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() ); - reg_def V21_H( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() ); - reg_def V21_J( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2)); - reg_def V21_K( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3)); - - reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() ); - reg_def V22_H( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() ); - reg_def V22_J( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2)); - reg_def V22_K( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3)); - - reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() ); - reg_def V23_H( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() ); - reg_def V23_J( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2)); - reg_def V23_K( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3)); - - reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() ); - reg_def V24_H( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() ); - reg_def V24_J( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2)); - reg_def V24_K( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3)); - - reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() ); - reg_def V25_H( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() ); - reg_def V25_J( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2)); - reg_def V25_K( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3)); - - reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() ); - reg_def V26_H( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() ); - reg_def V26_J( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2)); - reg_def V26_K( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3)); - - reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() ); - reg_def V27_H( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() ); - reg_def V27_J( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2)); - reg_def V27_K( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3)); - - reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() ); - reg_def V28_H( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() ); - reg_def V28_J( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2)); - reg_def V28_K( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3)); - - reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() ); - reg_def V29_H( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() ); - reg_def V29_J( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2)); - reg_def V29_K( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3)); - - reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() ); - reg_def V30_H( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() ); - reg_def V30_J( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2)); - reg_def V30_K( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3)); - - reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() ); - reg_def V31_H( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() ); - reg_def V31_J( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2)); - reg_def V31_K( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3)); +// For SVE vector registers, we simply extend vector register size to 8 +// 'logical' slots. This is nominally 256 bits but it actually covers +// all possible 'physical' SVE vector register lengths from 128 ~ 2048 +// bits. The 'physical' SVE vector register length is detected during +// startup, so the register allocator is able to identify the correct +// number of bytes needed for an SVE spill/unspill. +// Note that a vector register with 4 slots denotes a 128-bit NEON +// register allowing it to be distinguished from the corresponding SVE +// vector register when the SVE vector length is 128 bits. + +reg_def V0 ( SOC, SOC, Op_RegF, 0, v0->as_VMReg() ); +reg_def V0_H ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next() ); +reg_def V0_J ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(2) ); +reg_def V0_K ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(3) ); +reg_def V0_L ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(4) ); +reg_def V0_M ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(5) ); +reg_def V0_N ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(6) ); +reg_def V0_O ( SOC, SOC, Op_RegF, 0, v0->as_VMReg()->next(7) ); + +reg_def V1 ( SOC, SOC, Op_RegF, 1, v1->as_VMReg() ); +reg_def V1_H ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next() ); +reg_def V1_J ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(2) ); +reg_def V1_K ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(3) ); +reg_def V1_L ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(4) ); +reg_def V1_M ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(5) ); +reg_def V1_N ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(6) ); +reg_def V1_O ( SOC, SOC, Op_RegF, 1, v1->as_VMReg()->next(7) ); + +reg_def V2 ( SOC, SOC, Op_RegF, 2, v2->as_VMReg() ); +reg_def V2_H ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next() ); +reg_def V2_J ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(2) ); +reg_def V2_K ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(3) ); +reg_def V2_L ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(4) ); +reg_def V2_M ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(5) ); +reg_def V2_N ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(6) ); +reg_def V2_O ( SOC, SOC, Op_RegF, 2, v2->as_VMReg()->next(7) ); + +reg_def V3 ( SOC, SOC, Op_RegF, 3, v3->as_VMReg() ); +reg_def V3_H ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next() ); +reg_def V3_J ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(2) ); +reg_def V3_K ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(3) ); +reg_def V3_L ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(4) ); +reg_def V3_M ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(5) ); +reg_def V3_N ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(6) ); +reg_def V3_O ( SOC, SOC, Op_RegF, 3, v3->as_VMReg()->next(7) ); + +reg_def V4 ( SOC, SOC, Op_RegF, 4, v4->as_VMReg() ); +reg_def V4_H ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next() ); +reg_def V4_J ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(2) ); +reg_def V4_K ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(3) ); +reg_def V4_L ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(4) ); +reg_def V4_M ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(5) ); +reg_def V4_N ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(6) ); +reg_def V4_O ( SOC, SOC, Op_RegF, 4, v4->as_VMReg()->next(7) ); + +reg_def V5 ( SOC, SOC, Op_RegF, 5, v5->as_VMReg() ); +reg_def V5_H ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next() ); +reg_def V5_J ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(2) ); +reg_def V5_K ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(3) ); +reg_def V5_L ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(4) ); +reg_def V5_M ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(5) ); +reg_def V5_N ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(6) ); +reg_def V5_O ( SOC, SOC, Op_RegF, 5, v5->as_VMReg()->next(7) ); + +reg_def V6 ( SOC, SOC, Op_RegF, 6, v6->as_VMReg() ); +reg_def V6_H ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next() ); +reg_def V6_J ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(2) ); +reg_def V6_K ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(3) ); +reg_def V6_L ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(4) ); +reg_def V6_M ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(5) ); +reg_def V6_N ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(6) ); +reg_def V6_O ( SOC, SOC, Op_RegF, 6, v6->as_VMReg()->next(7) ); + +reg_def V7 ( SOC, SOC, Op_RegF, 7, v7->as_VMReg() ); +reg_def V7_H ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next() ); +reg_def V7_J ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(2) ); +reg_def V7_K ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(3) ); +reg_def V7_L ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(4) ); +reg_def V7_M ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(5) ); +reg_def V7_N ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(6) ); +reg_def V7_O ( SOC, SOC, Op_RegF, 7, v7->as_VMReg()->next(7) ); + +reg_def V8 ( SOC, SOE, Op_RegF, 8, v8->as_VMReg() ); +reg_def V8_H ( SOC, SOE, Op_RegF, 8, v8->as_VMReg()->next() ); +reg_def V8_J ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(2) ); +reg_def V8_K ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(3) ); +reg_def V8_L ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(4) ); +reg_def V8_M ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(5) ); +reg_def V8_N ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(6) ); +reg_def V8_O ( SOC, SOC, Op_RegF, 8, v8->as_VMReg()->next(7) ); + +reg_def V9 ( SOC, SOE, Op_RegF, 9, v9->as_VMReg() ); +reg_def V9_H ( SOC, SOE, Op_RegF, 9, v9->as_VMReg()->next() ); +reg_def V9_J ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(2) ); +reg_def V9_K ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(3) ); +reg_def V9_L ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(4) ); +reg_def V9_M ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(5) ); +reg_def V9_N ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(6) ); +reg_def V9_O ( SOC, SOC, Op_RegF, 9, v9->as_VMReg()->next(7) ); + +reg_def V10 ( SOC, SOE, Op_RegF, 10, v10->as_VMReg() ); +reg_def V10_H ( SOC, SOE, Op_RegF, 10, v10->as_VMReg()->next() ); +reg_def V10_J ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(2) ); +reg_def V10_K ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(3) ); +reg_def V10_L ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(4) ); +reg_def V10_M ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(5) ); +reg_def V10_N ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(6) ); +reg_def V10_O ( SOC, SOC, Op_RegF, 10, v10->as_VMReg()->next(7) ); + +reg_def V11 ( SOC, SOE, Op_RegF, 11, v11->as_VMReg() ); +reg_def V11_H ( SOC, SOE, Op_RegF, 11, v11->as_VMReg()->next() ); +reg_def V11_J ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(2) ); +reg_def V11_K ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(3) ); +reg_def V11_L ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(4) ); +reg_def V11_M ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(5) ); +reg_def V11_N ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(6) ); +reg_def V11_O ( SOC, SOC, Op_RegF, 11, v11->as_VMReg()->next(7) ); + +reg_def V12 ( SOC, SOE, Op_RegF, 12, v12->as_VMReg() ); +reg_def V12_H ( SOC, SOE, Op_RegF, 12, v12->as_VMReg()->next() ); +reg_def V12_J ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(2) ); +reg_def V12_K ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(3) ); +reg_def V12_L ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(4) ); +reg_def V12_M ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(5) ); +reg_def V12_N ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(6) ); +reg_def V12_O ( SOC, SOC, Op_RegF, 12, v12->as_VMReg()->next(7) ); + +reg_def V13 ( SOC, SOE, Op_RegF, 13, v13->as_VMReg() ); +reg_def V13_H ( SOC, SOE, Op_RegF, 13, v13->as_VMReg()->next() ); +reg_def V13_J ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(2) ); +reg_def V13_K ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(3) ); +reg_def V13_L ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(4) ); +reg_def V13_M ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(5) ); +reg_def V13_N ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(6) ); +reg_def V13_O ( SOC, SOC, Op_RegF, 13, v13->as_VMReg()->next(7) ); + +reg_def V14 ( SOC, SOE, Op_RegF, 14, v14->as_VMReg() ); +reg_def V14_H ( SOC, SOE, Op_RegF, 14, v14->as_VMReg()->next() ); +reg_def V14_J ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(2) ); +reg_def V14_K ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(3) ); +reg_def V14_L ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(4) ); +reg_def V14_M ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(5) ); +reg_def V14_N ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(6) ); +reg_def V14_O ( SOC, SOC, Op_RegF, 14, v14->as_VMReg()->next(7) ); + +reg_def V15 ( SOC, SOE, Op_RegF, 15, v15->as_VMReg() ); +reg_def V15_H ( SOC, SOE, Op_RegF, 15, v15->as_VMReg()->next() ); +reg_def V15_J ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(2) ); +reg_def V15_K ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(3) ); +reg_def V15_L ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(4) ); +reg_def V15_M ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(5) ); +reg_def V15_N ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(6) ); +reg_def V15_O ( SOC, SOC, Op_RegF, 15, v15->as_VMReg()->next(7) ); + +reg_def V16 ( SOC, SOC, Op_RegF, 16, v16->as_VMReg() ); +reg_def V16_H ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next() ); +reg_def V16_J ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(2) ); +reg_def V16_K ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(3) ); +reg_def V16_L ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(4) ); +reg_def V16_M ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(5) ); +reg_def V16_N ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(6) ); +reg_def V16_O ( SOC, SOC, Op_RegF, 16, v16->as_VMReg()->next(7) ); + +reg_def V17 ( SOC, SOC, Op_RegF, 17, v17->as_VMReg() ); +reg_def V17_H ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next() ); +reg_def V17_J ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(2) ); +reg_def V17_K ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(3) ); +reg_def V17_L ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(4) ); +reg_def V17_M ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(5) ); +reg_def V17_N ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(6) ); +reg_def V17_O ( SOC, SOC, Op_RegF, 17, v17->as_VMReg()->next(7) ); + +reg_def V18 ( SOC, SOC, Op_RegF, 18, v18->as_VMReg() ); +reg_def V18_H ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next() ); +reg_def V18_J ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(2) ); +reg_def V18_K ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(3) ); +reg_def V18_L ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(4) ); +reg_def V18_M ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(5) ); +reg_def V18_N ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(6) ); +reg_def V18_O ( SOC, SOC, Op_RegF, 18, v18->as_VMReg()->next(7) ); + +reg_def V19 ( SOC, SOC, Op_RegF, 19, v19->as_VMReg() ); +reg_def V19_H ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next() ); +reg_def V19_J ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(2) ); +reg_def V19_K ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(3) ); +reg_def V19_L ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(4) ); +reg_def V19_M ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(5) ); +reg_def V19_N ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(6) ); +reg_def V19_O ( SOC, SOC, Op_RegF, 19, v19->as_VMReg()->next(7) ); + +reg_def V20 ( SOC, SOC, Op_RegF, 20, v20->as_VMReg() ); +reg_def V20_H ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next() ); +reg_def V20_J ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(2) ); +reg_def V20_K ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(3) ); +reg_def V20_L ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(4) ); +reg_def V20_M ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(5) ); +reg_def V20_N ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(6) ); +reg_def V20_O ( SOC, SOC, Op_RegF, 20, v20->as_VMReg()->next(7) ); + +reg_def V21 ( SOC, SOC, Op_RegF, 21, v21->as_VMReg() ); +reg_def V21_H ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next() ); +reg_def V21_J ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(2) ); +reg_def V21_K ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(3) ); +reg_def V21_L ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(4) ); +reg_def V21_M ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(5) ); +reg_def V21_N ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(6) ); +reg_def V21_O ( SOC, SOC, Op_RegF, 21, v21->as_VMReg()->next(7) ); + +reg_def V22 ( SOC, SOC, Op_RegF, 22, v22->as_VMReg() ); +reg_def V22_H ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next() ); +reg_def V22_J ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(2) ); +reg_def V22_K ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(3) ); +reg_def V22_L ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(4) ); +reg_def V22_M ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(5) ); +reg_def V22_N ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(6) ); +reg_def V22_O ( SOC, SOC, Op_RegF, 22, v22->as_VMReg()->next(7) ); + +reg_def V23 ( SOC, SOC, Op_RegF, 23, v23->as_VMReg() ); +reg_def V23_H ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next() ); +reg_def V23_J ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(2) ); +reg_def V23_K ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(3) ); +reg_def V23_L ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(4) ); +reg_def V23_M ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(5) ); +reg_def V23_N ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(6) ); +reg_def V23_O ( SOC, SOC, Op_RegF, 23, v23->as_VMReg()->next(7) ); + +reg_def V24 ( SOC, SOC, Op_RegF, 24, v24->as_VMReg() ); +reg_def V24_H ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next() ); +reg_def V24_J ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(2) ); +reg_def V24_K ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(3) ); +reg_def V24_L ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(4) ); +reg_def V24_M ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(5) ); +reg_def V24_N ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(6) ); +reg_def V24_O ( SOC, SOC, Op_RegF, 24, v24->as_VMReg()->next(7) ); + +reg_def V25 ( SOC, SOC, Op_RegF, 25, v25->as_VMReg() ); +reg_def V25_H ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next() ); +reg_def V25_J ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(2) ); +reg_def V25_K ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(3) ); +reg_def V25_L ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(4) ); +reg_def V25_M ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(5) ); +reg_def V25_N ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(6) ); +reg_def V25_O ( SOC, SOC, Op_RegF, 25, v25->as_VMReg()->next(7) ); + +reg_def V26 ( SOC, SOC, Op_RegF, 26, v26->as_VMReg() ); +reg_def V26_H ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next() ); +reg_def V26_J ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(2) ); +reg_def V26_K ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(3) ); +reg_def V26_L ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(4) ); +reg_def V26_M ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(5) ); +reg_def V26_N ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(6) ); +reg_def V26_O ( SOC, SOC, Op_RegF, 26, v26->as_VMReg()->next(7) ); + +reg_def V27 ( SOC, SOC, Op_RegF, 27, v27->as_VMReg() ); +reg_def V27_H ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next() ); +reg_def V27_J ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(2) ); +reg_def V27_K ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(3) ); +reg_def V27_L ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(4) ); +reg_def V27_M ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(5) ); +reg_def V27_N ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(6) ); +reg_def V27_O ( SOC, SOC, Op_RegF, 27, v27->as_VMReg()->next(7) ); + +reg_def V28 ( SOC, SOC, Op_RegF, 28, v28->as_VMReg() ); +reg_def V28_H ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next() ); +reg_def V28_J ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(2) ); +reg_def V28_K ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(3) ); +reg_def V28_L ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(4) ); +reg_def V28_M ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(5) ); +reg_def V28_N ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(6) ); +reg_def V28_O ( SOC, SOC, Op_RegF, 28, v28->as_VMReg()->next(7) ); + +reg_def V29 ( SOC, SOC, Op_RegF, 29, v29->as_VMReg() ); +reg_def V29_H ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next() ); +reg_def V29_J ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(2) ); +reg_def V29_K ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(3) ); +reg_def V29_L ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(4) ); +reg_def V29_M ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(5) ); +reg_def V29_N ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(6) ); +reg_def V29_O ( SOC, SOC, Op_RegF, 29, v29->as_VMReg()->next(7) ); + +reg_def V30 ( SOC, SOC, Op_RegF, 30, v30->as_VMReg() ); +reg_def V30_H ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next() ); +reg_def V30_J ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(2) ); +reg_def V30_K ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(3) ); +reg_def V30_L ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(4) ); +reg_def V30_M ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(5) ); +reg_def V30_N ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(6) ); +reg_def V30_O ( SOC, SOC, Op_RegF, 30, v30->as_VMReg()->next(7) ); + +reg_def V31 ( SOC, SOC, Op_RegF, 31, v31->as_VMReg() ); +reg_def V31_H ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next() ); +reg_def V31_J ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(2) ); +reg_def V31_K ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(3) ); +reg_def V31_L ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(4) ); +reg_def V31_M ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(5) ); +reg_def V31_N ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(6) ); +reg_def V31_O ( SOC, SOC, Op_RegF, 31, v31->as_VMReg()->next(7) ); + + +// ---------------------------- +// SVE Predicate Registers +// ---------------------------- +reg_def P0 (SOC, SOC, Op_RegVMask, 0, p0->as_VMReg()); +reg_def P1 (SOC, SOC, Op_RegVMask, 1, p1->as_VMReg()); +reg_def P2 (SOC, SOC, Op_RegVMask, 2, p2->as_VMReg()); +reg_def P3 (SOC, SOC, Op_RegVMask, 3, p3->as_VMReg()); +reg_def P4 (SOC, SOC, Op_RegVMask, 4, p4->as_VMReg()); +reg_def P5 (SOC, SOC, Op_RegVMask, 5, p5->as_VMReg()); +reg_def P6 (SOC, SOC, Op_RegVMask, 6, p6->as_VMReg()); +reg_def P7 (SOC, SOC, Op_RegVMask, 7, p7->as_VMReg()); + // ---------------------------- // Special Registers @@ -381,50 +536,64 @@ alloc_class chunk0( R29, R29_H, // fp R30, R30_H, // lr R31, R31_H, // sp + R8, R8_H, // rscratch1 + R9, R9_H, // rscratch2 ); alloc_class chunk1( // no save - V16, V16_H, V16_J, V16_K, - V17, V17_H, V17_J, V17_K, - V18, V18_H, V18_J, V18_K, - V19, V19_H, V19_J, V19_K, - V20, V20_H, V20_J, V20_K, - V21, V21_H, V21_J, V21_K, - V22, V22_H, V22_J, V22_K, - V23, V23_H, V23_J, V23_K, - V24, V24_H, V24_J, V24_K, - V25, V25_H, V25_J, V25_K, - V26, V26_H, V26_J, V26_K, - V27, V27_H, V27_J, V27_K, - V28, V28_H, V28_J, V28_K, - V29, V29_H, V29_J, V29_K, - V30, V30_H, V30_J, V30_K, - V31, V31_H, V31_J, V31_K, + V16, V16_H, V16_J, V16_K, V16_L, V16_M, V16_N, V16_O, + V17, V17_H, V17_J, V17_K, V17_L, V17_M, V17_N, V17_O, + V18, V18_H, V18_J, V18_K, V18_L, V18_M, V18_N, V18_O, + V19, V19_H, V19_J, V19_K, V19_L, V19_M, V19_N, V19_O, + V20, V20_H, V20_J, V20_K, V20_L, V20_M, V20_N, V20_O, + V21, V21_H, V21_J, V21_K, V21_L, V21_M, V21_N, V21_O, + V22, V22_H, V22_J, V22_K, V22_L, V22_M, V22_N, V22_O, + V23, V23_H, V23_J, V23_K, V23_L, V23_M, V23_N, V23_O, + V24, V24_H, V24_J, V24_K, V24_L, V24_M, V24_N, V24_O, + V25, V25_H, V25_J, V25_K, V25_L, V25_M, V25_N, V25_O, + V26, V26_H, V26_J, V26_K, V26_L, V26_M, V26_N, V26_O, + V27, V27_H, V27_J, V27_K, V27_L, V27_M, V27_N, V27_O, + V28, V28_H, V28_J, V28_K, V28_L, V28_M, V28_N, V28_O, + V29, V29_H, V29_J, V29_K, V29_L, V29_M, V29_N, V29_O, + V30, V30_H, V30_J, V30_K, V30_L, V30_M, V30_N, V30_O, + V31, V31_H, V31_J, V31_K, V31_L, V31_M, V31_N, V31_O, // arg registers - V0, V0_H, V0_J, V0_K, - V1, V1_H, V1_J, V1_K, - V2, V2_H, V2_J, V2_K, - V3, V3_H, V3_J, V3_K, - V4, V4_H, V4_J, V4_K, - V5, V5_H, V5_J, V5_K, - V6, V6_H, V6_J, V6_K, - V7, V7_H, V7_J, V7_K, + V0, V0_H, V0_J, V0_K, V0_L, V0_M, V0_N, V0_O, + V1, V1_H, V1_J, V1_K, V1_L, V1_M, V1_N, V1_O, + V2, V2_H, V2_J, V2_K, V2_L, V2_M, V2_N, V2_O, + V3, V3_H, V3_J, V3_K, V3_L, V3_M, V3_N, V3_O, + V4, V4_H, V4_J, V4_K, V4_L, V4_M, V4_N, V4_O, + V5, V5_H, V5_J, V5_K, V5_L, V5_M, V5_N, V5_O, + V6, V6_H, V6_J, V6_K, V6_L, V6_M, V6_N, V6_O, + V7, V7_H, V7_J, V7_K, V7_L, V7_M, V7_N, V7_O, // non-volatiles - V8, V8_H, V8_J, V8_K, - V9, V9_H, V9_J, V9_K, - V10, V10_H, V10_J, V10_K, - V11, V11_H, V11_J, V11_K, - V12, V12_H, V12_J, V12_K, - V13, V13_H, V13_J, V13_K, - V14, V14_H, V14_J, V14_K, - V15, V15_H, V15_J, V15_K, + V8, V8_H, V8_J, V8_K, V8_L, V8_M, V8_N, V8_O, + V9, V9_H, V9_J, V9_K, V9_L, V9_M, V9_N, V9_O, + V10, V10_H, V10_J, V10_K, V10_L, V10_M, V10_N, V10_O, + V11, V11_H, V11_J, V11_K, V11_L, V11_M, V11_N, V11_O, + V12, V12_H, V12_J, V12_K, V12_L, V12_M, V12_N, V12_O, + V13, V13_H, V13_J, V13_K, V13_L, V13_M, V13_N, V13_O, + V14, V14_H, V14_J, V14_K, V14_L, V14_M, V14_N, V14_O, + V15, V15_H, V15_J, V15_K, V15_L, V15_M, V15_N, V15_O, +); + +alloc_class chunk2 ( + P0, + P1, + P2, + P3, + P4, + P5, + P6, + P7, + // Only use P0~P7 here for performance ); -alloc_class chunk2(RFLAGS); +alloc_class chunk3(RFLAGS); //----------Architecture Description Register Classes-------------------------- // Several register classes are automatically defined based upon information in @@ -865,6 +1034,42 @@ reg_class double_reg( V31, V31_H ); +// Class for all SVE vector registers. +reg_class vectora_reg ( + V0, V0_H, V0_J, V0_K, V0_L, V0_M, V0_N, V0_O, + V1, V1_H, V1_J, V1_K, V1_L, V1_M, V1_N, V1_O, + V2, V2_H, V2_J, V2_K, V2_L, V2_M, V2_N, V2_O, + V3, V3_H, V3_J, V3_K, V3_L, V3_M, V3_N, V3_O, + V4, V4_H, V4_J, V4_K, V4_L, V4_M, V4_N, V4_O, + V5, V5_H, V5_J, V5_K, V5_L, V5_M, V5_N, V5_O, + V6, V6_H, V6_J, V6_K, V6_L, V6_M, V6_N, V6_O, + V7, V7_H, V7_J, V7_K, V7_L, V7_M, V7_N, V7_O, + V8, V8_H, V8_J, V8_K, V8_L, V8_M, V8_N, V8_O, + V9, V9_H, V9_J, V9_K, V9_L, V9_M, V9_N, V9_O, + V10, V10_H, V10_J, V10_K, V10_L, V10_M, V10_N, V10_O, + V11, V11_H, V11_J, V11_K, V11_L, V11_M, V11_N, V11_O, + V12, V12_H, V12_J, V12_K, V12_L, V12_M, V12_N, V12_O, + V13, V13_H, V13_J, V13_K, V13_L, V13_M, V13_N, V13_O, + V14, V14_H, V14_J, V14_K, V14_L, V14_M, V14_N, V14_O, + V15, V15_H, V15_J, V15_K, V15_L, V15_M, V15_N, V15_O, + V16, V16_H, V16_J, V16_K, V16_L, V16_M, V16_N, V16_O, + V17, V17_H, V17_J, V17_K, V17_L, V17_M, V17_N, V17_O, + V18, V18_H, V18_J, V18_K, V18_L, V18_M, V18_N, V18_O, + V19, V19_H, V19_J, V19_K, V19_L, V19_M, V19_N, V19_O, + V20, V20_H, V20_J, V20_K, V20_L, V20_M, V20_N, V20_O, + V21, V21_H, V21_J, V21_K, V21_L, V21_M, V21_N, V21_O, + V22, V22_H, V22_J, V22_K, V22_L, V22_M, V22_N, V22_O, + V23, V23_H, V23_J, V23_K, V23_L, V23_M, V23_N, V23_O, + V24, V24_H, V24_J, V24_K, V24_L, V24_M, V24_N, V24_O, + V25, V25_H, V25_J, V25_K, V25_L, V25_M, V25_N, V25_O, + V26, V26_H, V26_J, V26_K, V26_L, V26_M, V26_N, V26_O, + V27, V27_H, V27_J, V27_K, V27_L, V27_M, V27_N, V27_O, + V28, V28_H, V28_J, V28_K, V28_L, V28_M, V28_N, V28_O, + V29, V29_H, V29_J, V29_K, V29_L, V29_M, V29_N, V29_O, + V30, V30_H, V30_J, V30_K, V30_L, V30_M, V30_N, V30_O, + V31, V31_H, V31_J, V31_K, V31_L, V31_M, V31_N, V31_O, +); + // Class for all 64bit vector registers reg_class vectord_reg( V0, V0_H, @@ -1097,6 +1302,31 @@ reg_class v31_reg( V31, V31_H ); +// Class for all SVE predicate registers. +reg_class pr_reg ( + P0, + P1, + P2, + P3, + P4, + P5, + P6, + // P7, non-allocatable, preserved with all elements preset to TRUE. +); + +// Class for SVE governing predicate registers, which are used +// to determine the active elements of a predicated instruction. +reg_class gov_pr ( + P0, + P1, + P2, + P3, + P4, + P5, + P6, + // P7, non-allocatable, preserved with all elements preset to TRUE. +); + // Singleton class for condition codes reg_class int_flags(RFLAGS); @@ -1862,7 +2092,7 @@ int MachEpilogNode::safepoint_offset() const { // Figure out which register class each belongs in: rc_int, rc_float or // rc_stack. -enum RC { rc_bad, rc_int, rc_float, rc_stack }; +enum RC { rc_bad, rc_int, rc_float, rc_predicate, rc_stack }; static enum RC rc_class(OptoReg::Name reg) { @@ -1870,20 +2100,25 @@ static enum RC rc_class(OptoReg::Name reg) { return rc_bad; } - // we have 30 int registers * 2 halves - // (rscratch1 and rscratch2 are omitted) - int slots_of_int_registers = RegisterImpl::max_slots_per_register * (RegisterImpl::number_of_registers - 2); + // we have 32 int registers * 2 halves + int slots_of_int_registers = RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers; if (reg < slots_of_int_registers) { return rc_int; } - // we have 32 float register * 4 halves - if (reg < slots_of_int_registers + FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers) { + // we have 32 float register * 8 halves + int slots_of_float_registers = FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers; + if (reg < slots_of_int_registers + slots_of_float_registers) { return rc_float; } - // Between float regs & stack is the flags regs. + int slots_of_predicate_registers = PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers; + if (reg < slots_of_int_registers + slots_of_float_registers + slots_of_predicate_registers) { + return rc_predicate; + } + + // Between predicate regs & stack is the flags. assert(OptoReg::is_stack(reg), "blow up if spilling flags"); return rc_stack; @@ -1941,12 +2176,12 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo as_FloatRegister(Matcher::_regEncode[src_lo])); } else if (src_lo_rc == rc_float && dst_lo_rc == rc_stack) { __ spill(as_FloatRegister(Matcher::_regEncode[src_lo]), - ireg == Op_VecD ? __ D : __ Q, - ra_->reg2offset(dst_lo)); + ireg == Op_VecD ? __ D : __ Q, + ra_->reg2offset(dst_lo)); } else if (src_lo_rc == rc_stack && dst_lo_rc == rc_float) { __ unspill(as_FloatRegister(Matcher::_regEncode[dst_lo]), - ireg == Op_VecD ? __ D : __ Q, - ra_->reg2offset(src_lo)); + ireg == Op_VecD ? __ D : __ Q, + ra_->reg2offset(src_lo)); } else { ShouldNotReachHere(); } @@ -2031,9 +2266,24 @@ uint MachSpillCopyNode::implementation(CodeBuffer *cbuf, PhaseRegAlloc *ra_, boo st->print("%s", Matcher::regName[dst_lo]); } if (bottom_type()->isa_vect() != NULL) { - st->print("\t# vector spill size = %d", ideal_reg()==Op_VecD ? 64:128); + int vsize = 0; + switch (ideal_reg()) { + case Op_VecD: + vsize = 64; + break; + case Op_VecX: + vsize = 128; + break; + case Op_VecA: + vsize = Matcher::scalable_vector_reg_size(T_BYTE) * 8; + break; + default: + assert(false, "bad register type for spill"); + ShouldNotReachHere(); + } + st->print("\t# vector spill size = %d", vsize); } else { - st->print("\t# spill size = %d", is64 ? 64:32); + st->print("\t# spill size = %d", is64 ? 64 : 32); } } @@ -2204,7 +2454,7 @@ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { } const bool Matcher::has_predicated_vectors(void) { - return false; + return UseSVE > 0; } const int Matcher::float_pressure(int default_pressure_threshold) { @@ -2240,7 +2490,8 @@ const bool Matcher::convL2FSupported(void) { // Vector width in bytes. const int Matcher::vector_width_in_bytes(BasicType bt) { - int size = MIN2(16,(int)MaxVectorSize); + // The MaxVectorSize should have been set by detecting SVE max vector register size. + int size = MIN2((UseSVE > 0) ? 256 : 16, (int)MaxVectorSize); // Minimum 2 values in vector if (size < 2*type2aelembytes(bt)) size = 0; // But never < 4 @@ -2253,14 +2504,32 @@ const int Matcher::max_vector_size(const BasicType bt) { return vector_width_in_bytes(bt)/type2aelembytes(bt); } const int Matcher::min_vector_size(const BasicType bt) { -// For the moment limit the vector size to 8 bytes + int max_size = max_vector_size(bt); + if ((UseSVE > 0) && (MaxVectorSize >= 16)) { + // Currently vector length less than SVE vector register size is not supported. + return max_size; + } else { + // For the moment limit the vector size to 8 bytes with NEON. int size = 8 / type2aelembytes(bt); if (size < 2) size = 2; return size; + } +} + +const bool Matcher::supports_scalable_vector() { + return UseSVE > 0; +} + +// Actual max scalable vector register length. +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return Matcher::max_vector_size(bt); } // Vector ideal reg. const uint Matcher::vector_ideal_reg(int len) { + if (UseSVE > 0 && 16 <= len && len <= 256) { + return Op_VecA; + } switch(len) { case 8: return Op_VecD; case 16: return Op_VecX; @@ -2270,6 +2539,9 @@ const uint Matcher::vector_ideal_reg(int len) { } const uint Matcher::vector_shift_count_ideal_reg(int size) { + if (UseSVE > 0 && 16 <= size && size <= 256) { + return Op_VecA; + } switch(size) { case 8: return Op_VecD; case 16: return Op_VecX; @@ -4851,6 +5123,18 @@ operand vRegD() interface(REG_INTER); %} +// Generic vector class. This will be used for +// all vector operands, including NEON and SVE, +// but currently only used for SVE VecA. +operand vReg() +%{ + constraint(ALLOC_IN_RC(vectora_reg)); + match(VecA); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + operand vecD() %{ constraint(ALLOC_IN_RC(vectord_reg)); @@ -5159,6 +5443,15 @@ operand vRegD_V31() interface(REG_INTER); %} +operand pRegGov() +%{ + constraint(ALLOC_IN_RC(gov_pr)); + match(RegVMask); + op_cost(0); + format %{ %} + interface(REG_INTER); +%} + // Flags register, used as output of signed compare instructions // note that on AArch64 we also use this register as the output for diff --git a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp index 228b82660..6f4e75ff4 100644 --- a/src/hotspot/cpu/aarch64/assembler_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/assembler_aarch64.hpp @@ -273,6 +273,14 @@ public: f(r->encoding_nocheck(), lsb + 4, lsb); } + void prf(PRegister r, int lsb) { + f(r->encoding_nocheck(), lsb + 3, lsb); + } + + void pgrf(PRegister r, int lsb) { + f(r->encoding_nocheck(), lsb + 2, lsb); + } + unsigned get(int msb = 31, int lsb = 0) { int nbits = msb - lsb + 1; unsigned mask = checked_cast<unsigned>(right_n_bits(nbits)) << lsb; @@ -669,6 +677,12 @@ public: void rf(FloatRegister reg, int lsb) { current->rf(reg, lsb); } + void prf(PRegister reg, int lsb) { + current->prf(reg, lsb); + } + void pgrf(PRegister reg, int lsb) { + current->pgrf(reg, lsb); + } void fixed(unsigned value, unsigned mask) { current->fixed(value, mask); } diff --git a/src/hotspot/cpu/aarch64/register_aarch64.cpp b/src/hotspot/cpu/aarch64/register_aarch64.cpp index 36cbe3fee..3db8e8337 100644 --- a/src/hotspot/cpu/aarch64/register_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/register_aarch64.cpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2000, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -33,6 +33,9 @@ const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; +const int ConcreteRegisterImpl::max_pr + = ConcreteRegisterImpl::max_fpr + PRegisterImpl::number_of_registers; + const char* RegisterImpl::name() const { const char* names[number_of_registers] = { "c_rarg0", "c_rarg1", "c_rarg2", "c_rarg3", "c_rarg4", "c_rarg5", "c_rarg6", "c_rarg7", @@ -54,3 +57,10 @@ const char* FloatRegisterImpl::name() const { }; return is_valid() ? names[encoding()] : "noreg"; } + +const char* PRegisterImpl::name() const { + const char* names[number_of_registers] = { + "p0", "p1", "p2", "p3", "p4", "p5", "p6", "p7" + }; + return is_valid() ? names[encoding()] : "noreg"; +} diff --git a/src/hotspot/cpu/aarch64/register_aarch64.hpp b/src/hotspot/cpu/aarch64/register_aarch64.hpp index 20f549188..e7a9cee92 100644 --- a/src/hotspot/cpu/aarch64/register_aarch64.hpp +++ b/src/hotspot/cpu/aarch64/register_aarch64.hpp @@ -140,9 +140,10 @@ class FloatRegisterImpl: public AbstractRegisterImpl { public: enum { number_of_registers = 32, - max_slots_per_register = 4, + max_slots_per_register = 8, save_slots_per_register = 2, - extra_save_slots_per_register = max_slots_per_register - save_slots_per_register + slots_per_neon_register = 4, + extra_save_slots_per_neon_register = slots_per_neon_register - save_slots_per_register }; // construction @@ -198,6 +199,79 @@ CONSTANT_REGISTER_DECLARATION(FloatRegister, v29 , (29)); CONSTANT_REGISTER_DECLARATION(FloatRegister, v30 , (30)); CONSTANT_REGISTER_DECLARATION(FloatRegister, v31 , (31)); +// SVE vector registers, shared with the SIMD&FP v0-v31. Vn maps to Zn[127:0]. +CONSTANT_REGISTER_DECLARATION(FloatRegister, z0 , ( 0)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z1 , ( 1)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z2 , ( 2)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z3 , ( 3)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z4 , ( 4)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z5 , ( 5)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z6 , ( 6)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z7 , ( 7)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z8 , ( 8)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z9 , ( 9)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z10 , (10)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z11 , (11)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z12 , (12)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z13 , (13)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z14 , (14)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z15 , (15)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z16 , (16)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z17 , (17)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z18 , (18)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z19 , (19)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z20 , (20)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z21 , (21)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z22 , (22)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z23 , (23)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z24 , (24)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z25 , (25)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z26 , (26)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z27 , (27)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z28 , (28)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z29 , (29)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z30 , (30)); +CONSTANT_REGISTER_DECLARATION(FloatRegister, z31 , (31)); + +class PRegisterImpl; +typedef PRegisterImpl* PRegister; +inline PRegister as_PRegister(int encoding) { + return (PRegister)(intptr_t)encoding; +} + +// The implementation of predicate registers for the architecture +class PRegisterImpl: public AbstractRegisterImpl { + public: + enum { + number_of_registers = 8, + max_slots_per_register = 1 + }; + + // construction + inline friend PRegister as_PRegister(int encoding); + + VMReg as_VMReg(); + + // derived registers, offsets, and addresses + PRegister successor() const { return as_PRegister(encoding() + 1); } + + // accessors + int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } + int encoding_nocheck() const { return (intptr_t)this; } + bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } + const char* name() const; +}; + +// The predicate registers of SVE. +CONSTANT_REGISTER_DECLARATION(PRegister, p0, ( 0)); +CONSTANT_REGISTER_DECLARATION(PRegister, p1, ( 1)); +CONSTANT_REGISTER_DECLARATION(PRegister, p2, ( 2)); +CONSTANT_REGISTER_DECLARATION(PRegister, p3, ( 3)); +CONSTANT_REGISTER_DECLARATION(PRegister, p4, ( 4)); +CONSTANT_REGISTER_DECLARATION(PRegister, p5, ( 5)); +CONSTANT_REGISTER_DECLARATION(PRegister, p6, ( 6)); +CONSTANT_REGISTER_DECLARATION(PRegister, p7, ( 7)); + // Need to know the total number of registers of all sorts for SharedInfo. // Define a class that exports it. class ConcreteRegisterImpl : public AbstractRegisterImpl { @@ -210,12 +284,14 @@ class ConcreteRegisterImpl : public AbstractRegisterImpl { number_of_registers = (RegisterImpl::max_slots_per_register * RegisterImpl::number_of_registers + FloatRegisterImpl::max_slots_per_register * FloatRegisterImpl::number_of_registers + + PRegisterImpl::max_slots_per_register * PRegisterImpl::number_of_registers + 1) // flags }; // added to make it compile static const int max_gpr; static const int max_fpr; + static const int max_pr; }; class RegSetIterator; diff --git a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp index c18109087..15131ed32 100644 --- a/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/register_definitions_aarch64.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2002, 2020, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2014, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * @@ -154,3 +154,45 @@ REGISTER_DEFINITION(Register, rthread); REGISTER_DEFINITION(Register, rheapbase); REGISTER_DEFINITION(Register, r31_sp); + +REGISTER_DEFINITION(FloatRegister, z0); +REGISTER_DEFINITION(FloatRegister, z1); +REGISTER_DEFINITION(FloatRegister, z2); +REGISTER_DEFINITION(FloatRegister, z3); +REGISTER_DEFINITION(FloatRegister, z4); +REGISTER_DEFINITION(FloatRegister, z5); +REGISTER_DEFINITION(FloatRegister, z6); +REGISTER_DEFINITION(FloatRegister, z7); +REGISTER_DEFINITION(FloatRegister, z8); +REGISTER_DEFINITION(FloatRegister, z9); +REGISTER_DEFINITION(FloatRegister, z10); +REGISTER_DEFINITION(FloatRegister, z11); +REGISTER_DEFINITION(FloatRegister, z12); +REGISTER_DEFINITION(FloatRegister, z13); +REGISTER_DEFINITION(FloatRegister, z14); +REGISTER_DEFINITION(FloatRegister, z15); +REGISTER_DEFINITION(FloatRegister, z16); +REGISTER_DEFINITION(FloatRegister, z17); +REGISTER_DEFINITION(FloatRegister, z18); +REGISTER_DEFINITION(FloatRegister, z19); +REGISTER_DEFINITION(FloatRegister, z20); +REGISTER_DEFINITION(FloatRegister, z21); +REGISTER_DEFINITION(FloatRegister, z22); +REGISTER_DEFINITION(FloatRegister, z23); +REGISTER_DEFINITION(FloatRegister, z24); +REGISTER_DEFINITION(FloatRegister, z25); +REGISTER_DEFINITION(FloatRegister, z26); +REGISTER_DEFINITION(FloatRegister, z27); +REGISTER_DEFINITION(FloatRegister, z28); +REGISTER_DEFINITION(FloatRegister, z29); +REGISTER_DEFINITION(FloatRegister, z30); +REGISTER_DEFINITION(FloatRegister, z31); + +REGISTER_DEFINITION(PRegister, p0); +REGISTER_DEFINITION(PRegister, p1); +REGISTER_DEFINITION(PRegister, p2); +REGISTER_DEFINITION(PRegister, p3); +REGISTER_DEFINITION(PRegister, p4); +REGISTER_DEFINITION(PRegister, p5); +REGISTER_DEFINITION(PRegister, p6); +REGISTER_DEFINITION(PRegister, p7); diff --git a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp index dbad48582..3bf7284a7 100644 --- a/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/sharedRuntime_aarch64.cpp @@ -111,11 +111,28 @@ class RegisterSaver { }; OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) { + bool use_sve = false; + int sve_vector_size_in_bytes = 0; + int sve_vector_size_in_slots = 0; + +#ifdef COMPILER2 + use_sve = Matcher::supports_scalable_vector(); + sve_vector_size_in_bytes = Matcher::scalable_vector_reg_size(T_BYTE); + sve_vector_size_in_slots = Matcher::scalable_vector_reg_size(T_FLOAT); +#endif + #if COMPILER2_OR_JVMCI if (save_vectors) { + int vect_words = 0; + int extra_save_slots_per_register = 0; // Save upper half of vector registers - int vect_words = FloatRegisterImpl::number_of_registers * FloatRegisterImpl::extra_save_slots_per_register / - VMRegImpl::slots_per_word; + if (use_sve) { + extra_save_slots_per_register = sve_vector_size_in_slots - FloatRegisterImpl::save_slots_per_register; + } else { + extra_save_slots_per_register = FloatRegisterImpl::extra_save_slots_per_neon_register; + } + vect_words = FloatRegisterImpl::number_of_registers * extra_save_slots_per_register / + VMRegImpl::slots_per_word; additional_frame_words += vect_words; } #else @@ -158,8 +175,13 @@ OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_ for (int i = 0; i < FloatRegisterImpl::number_of_registers; i++) { FloatRegister r = as_FloatRegister(i); - int sp_offset = save_vectors ? (FloatRegisterImpl::max_slots_per_register * i) : - (FloatRegisterImpl::save_slots_per_register * i); + int sp_offset = 0; + if (save_vectors) { + sp_offset = use_sve ? (sve_vector_size_in_slots * i) : + (FloatRegisterImpl::slots_per_neon_register * i); + } else { + sp_offset = FloatRegisterImpl::save_slots_per_register * i; + } oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset), r->as_VMReg()); } diff --git a/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp b/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp index c5d4383b4..dde7a7a91 100644 --- a/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp +++ b/src/hotspot/cpu/aarch64/vmreg_aarch64.inline.hpp @@ -1,6 +1,6 @@ /* - * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2014, Red Hat Inc. All rights reserved. + * Copyright (c) 2006, 2020, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -36,4 +36,8 @@ inline VMReg FloatRegisterImpl::as_VMReg() { ConcreteRegisterImpl::max_gpr); } +inline VMReg PRegisterImpl::as_VMReg() { + return VMRegImpl::as_VMReg(encoding() + ConcreteRegisterImpl::max_fpr); +} + #endif // CPU_AARCH64_VM_VMREG_AARCH64_INLINE_HPP diff --git a/src/hotspot/cpu/arm/arm.ad b/src/hotspot/cpu/arm/arm.ad index 4a32af54e..0ef4d7f3e 100644 --- a/src/hotspot/cpu/arm/arm.ad +++ b/src/hotspot/cpu/arm/arm.ad @@ -1121,6 +1121,14 @@ const int Matcher::vector_width_in_bytes(BasicType bt) { return MaxVectorSize; } +const bool Matcher::supports_scalable_vector() { + return false; +} + +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return -1; +} + // Vector ideal reg corresponding to specified size in bytes const uint Matcher::vector_ideal_reg(int size) { assert(MaxVectorSize >= size, ""); diff --git a/src/hotspot/cpu/ppc/ppc.ad b/src/hotspot/cpu/ppc/ppc.ad index 36cbada53..7ee16a975 100644 --- a/src/hotspot/cpu/ppc/ppc.ad +++ b/src/hotspot/cpu/ppc/ppc.ad @@ -2310,6 +2310,14 @@ const int Matcher::min_vector_size(const BasicType bt) { return max_vector_size(bt); // Same as max. } +const bool Matcher::supports_scalable_vector() { + return false; +} + +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return -1; +} + // PPC implementation uses VSX load/store instructions (if // SuperwordUseVSX) which support 4 byte but not arbitrary alignment const bool Matcher::misaligned_vectors_ok() { diff --git a/src/hotspot/cpu/s390/s390.ad b/src/hotspot/cpu/s390/s390.ad index 96c231b0a..ea09aaafc 100644 --- a/src/hotspot/cpu/s390/s390.ad +++ b/src/hotspot/cpu/s390/s390.ad @@ -1573,6 +1573,14 @@ const int Matcher::min_vector_size(const BasicType bt) { return max_vector_size(bt); // Same as max. } +const bool Matcher::supports_scalable_vector() { + return false; +} + +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return -1; +} + const uint Matcher::vector_shift_count_ideal_reg(int size) { fatal("vector shift is not supported"); return Node::NotAMachineReg; diff --git a/src/hotspot/cpu/x86/x86.ad b/src/hotspot/cpu/x86/x86.ad index abdd7483d..76dd6addd 100644 --- a/src/hotspot/cpu/x86/x86.ad +++ b/src/hotspot/cpu/x86/x86.ad @@ -1,5 +1,5 @@ // -// Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2011, 2020, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -1485,6 +1485,14 @@ const int Matcher::min_vector_size(const BasicType bt) { return MIN2(size,max_size); } +const bool Matcher::supports_scalable_vector() { + return false; +} + +const int Matcher::scalable_vector_reg_size(const BasicType bt) { + return -1; +} + // Vector ideal reg corresponding to specified size in bytes const uint Matcher::vector_ideal_reg(int size) { assert(MaxVectorSize >= size, ""); diff --git a/src/hotspot/cpu/x86/x86_64.ad b/src/hotspot/cpu/x86/x86_64.ad index 8904bba97..e09cdd061 100644 --- a/src/hotspot/cpu/x86/x86_64.ad +++ b/src/hotspot/cpu/x86/x86_64.ad @@ -2968,7 +2968,7 @@ frame RAX_H_num // Op_RegL }; // Excluded flags and vector registers. - assert(ARRAY_SIZE(hi) == _last_machine_leaf - 6, "missing type"); + assert(ARRAY_SIZE(hi) == _last_machine_leaf - 8, "missing type"); return OptoRegPair(hi[ideal_reg], lo[ideal_reg]); %} %} diff --git a/src/hotspot/share/adlc/archDesc.cpp b/src/hotspot/share/adlc/archDesc.cpp index ba61aa4c0..9e41b2dc6 100644 --- a/src/hotspot/share/adlc/archDesc.cpp +++ b/src/hotspot/share/adlc/archDesc.cpp @@ -1,5 +1,5 @@ // -// Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. // DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. // // This code is free software; you can redistribute it and/or modify it @@ -929,6 +929,7 @@ const char *ArchDesc::getIdealType(const char *idealOp) { // Match Vector types. if (strncmp(idealOp, "Vec",3)==0) { switch(last_char) { + case 'A': return "TypeVect::VECTA"; case 'S': return "TypeVect::VECTS"; case 'D': return "TypeVect::VECTD"; case 'X': return "TypeVect::VECTX"; @@ -939,6 +940,10 @@ const char *ArchDesc::getIdealType(const char *idealOp) { } } + if (strncmp(idealOp, "RegVMask", 8) == 0) { + return "Type::BOTTOM"; + } + // !!!!! switch(last_char) { case 'I': return "TypeInt::INT"; diff --git a/src/hotspot/share/adlc/formssel.cpp b/src/hotspot/share/adlc/formssel.cpp index c7b855a7e..a37866824 100644 --- a/src/hotspot/share/adlc/formssel.cpp +++ b/src/hotspot/share/adlc/formssel.cpp @@ -3963,6 +3963,8 @@ bool MatchRule::is_base_register(FormDict &globals) const { strcmp(opType,"RegL")==0 || strcmp(opType,"RegF")==0 || strcmp(opType,"RegD")==0 || + strcmp(opType,"RegVMask")==0 || + strcmp(opType,"VecA")==0 || strcmp(opType,"VecS")==0 || strcmp(opType,"VecD")==0 || strcmp(opType,"VecX")==0 || diff --git a/src/hotspot/share/opto/chaitin.cpp b/src/hotspot/share/opto/chaitin.cpp index 914dc43f6..710af9de8 100644 --- a/src/hotspot/share/opto/chaitin.cpp +++ b/src/hotspot/share/opto/chaitin.cpp @@ -77,6 +77,7 @@ void LRG::dump() const { if( _is_oop ) tty->print("Oop "); if( _is_float ) tty->print("Float "); if( _is_vector ) tty->print("Vector "); + if( _is_scalable ) tty->print("Scalable "); if( _was_spilled1 ) tty->print("Spilled "); if( _was_spilled2 ) tty->print("Spilled2 "); if( _direct_conflict ) tty->print("Direct_conflict "); @@ -646,7 +647,15 @@ void PhaseChaitin::Register_Allocate() { // Live ranges record the highest register in their mask. // We want the low register for the AD file writer's convenience. OptoReg::Name hi = lrg.reg(); // Get hi register - OptoReg::Name lo = OptoReg::add(hi, (1-lrg.num_regs())); // Find lo + int num_regs = lrg.num_regs(); + if (lrg.is_scalable() && OptoReg::is_stack(hi)) { + // For scalable vector registers, when they are allocated in physical + // registers, num_regs is RegMask::SlotsPerVecA for reg mask of scalable + // vector. If they are allocated on stack, we need to get the actual + // num_regs, which reflects the physical length of scalable registers. + num_regs = lrg.scalable_reg_slots(); + } + OptoReg::Name lo = OptoReg::add(hi, (1-num_regs)); // Find lo // We have to use pair [lo,lo+1] even for wide vectors because // the rest of code generation works only with pairs. It is safe // since for registers encoding only 'lo' is used. @@ -801,8 +810,19 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { // Check for vector live range (only if vector register is used). // On SPARC vector uses RegD which could be misaligned so it is not // processes as vector in RA. - if (RegMask::is_vector(ireg)) + if (RegMask::is_vector(ireg)) { lrg._is_vector = 1; + if (ireg == Op_VecA) { + assert(Matcher::supports_scalable_vector(), "scalable vector should be supported"); + lrg._is_scalable = 1; + // For scalable vector, when it is allocated in physical register, + // num_regs is RegMask::SlotsPerVecA for reg mask, + // which may not be the actual physical register size. + // If it is allocated in stack, we need to get the actual + // physical length of scalable vector register. + lrg.set_scalable_reg_slots(Matcher::scalable_vector_reg_size(T_FLOAT)); + } + } assert(n_type->isa_vect() == NULL || lrg._is_vector || ireg == Op_RegD || ireg == Op_RegL, "vector must be in vector registers"); @@ -912,6 +932,13 @@ void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) { lrg.set_reg_pressure(1); #endif break; + case Op_VecA: + assert(Matcher::supports_scalable_vector(), "does not support scalable vector"); + assert(RegMask::num_registers(Op_VecA) == RegMask::SlotsPerVecA, "sanity"); + assert(lrgmask.is_aligned_sets(RegMask::SlotsPerVecA), "vector should be aligned"); + lrg.set_num_regs(RegMask::SlotsPerVecA); + lrg.set_reg_pressure(1); + break; case Op_VecS: assert(Matcher::vector_size_supported(T_BYTE,4), "sanity"); assert(RegMask::num_registers(Op_VecS) == RegMask::SlotsPerVecS, "sanity"); @@ -1358,6 +1385,46 @@ static bool is_legal_reg(LRG &lrg, OptoReg::Name reg, int chunk) { return false; } +static OptoReg::Name find_first_set(LRG &lrg, RegMask mask, int chunk) { + int num_regs = lrg.num_regs(); + OptoReg::Name assigned = mask.find_first_set(lrg, num_regs); + + if (lrg.is_scalable()) { + // a physical register is found + if (chunk == 0 && OptoReg::is_reg(assigned)) { + return assigned; + } + + // find available stack slots for scalable register + if (lrg._is_vector) { + num_regs = lrg.scalable_reg_slots(); + // if actual scalable vector register is exactly SlotsPerVecA * 32 bits + if (num_regs == RegMask::SlotsPerVecA) { + return assigned; + } + + // mask has been cleared out by clear_to_sets(SlotsPerVecA) before choose_color, but it + // does not work for scalable size. We have to find adjacent scalable_reg_slots() bits + // instead of SlotsPerVecA bits. + assigned = mask.find_first_set(lrg, num_regs); // find highest valid reg + while (OptoReg::is_valid(assigned) && RegMask::can_represent(assigned)) { + // Verify the found reg has scalable_reg_slots() bits set. + if (mask.is_valid_reg(assigned, num_regs)) { + return assigned; + } else { + // Remove more for each iteration + mask.Remove(assigned - num_regs + 1); // Unmask the lowest reg + mask.clear_to_sets(RegMask::SlotsPerVecA); // Align by SlotsPerVecA bits + assigned = mask.find_first_set(lrg, num_regs); + } + } + return OptoReg::Bad; // will cause chunk change, and retry next chunk + } + } + + return assigned; +} + // Choose a color using the biasing heuristic OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { @@ -1391,7 +1458,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { RegMask tempmask = lrg.mask(); tempmask.AND(lrgs(copy_lrg).mask()); tempmask.clear_to_sets(lrg.num_regs()); - OptoReg::Name reg = tempmask.find_first_set(lrg.num_regs()); + OptoReg::Name reg = find_first_set(lrg, tempmask, chunk); if (OptoReg::is_valid(reg)) return reg; } @@ -1400,7 +1467,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { // If no bias info exists, just go with the register selection ordering if (lrg._is_vector || lrg.num_regs() == 2) { // Find an aligned set - return OptoReg::add(lrg.mask().find_first_set(lrg.num_regs()),chunk); + return OptoReg::add(find_first_set(lrg, lrg.mask(), chunk), chunk); } // CNC - Fun hack. Alternate 1st and 2nd selection. Enables post-allocate @@ -1455,7 +1522,6 @@ uint PhaseChaitin::Select( ) { LRG *lrg = &lrgs(lidx); _simplified = lrg->_next; - #ifndef PRODUCT if (trace_spilling()) { ttyLocker ttyl; @@ -1539,7 +1605,6 @@ uint PhaseChaitin::Select( ) { // Bump register mask up to next stack chunk chunk += RegMask::CHUNK_SIZE; lrg->Set_All(); - goto retry_next_chunk; } @@ -1564,12 +1629,21 @@ uint PhaseChaitin::Select( ) { int n_regs = lrg->num_regs(); assert(!lrg->_is_vector || !lrg->_fat_proj, "sanity"); if (n_regs == 1 || !lrg->_fat_proj) { - assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity"); + if (Matcher::supports_scalable_vector()) { + assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecA, "sanity"); + } else { + assert(!lrg->_is_vector || n_regs <= RegMask::SlotsPerVecZ, "sanity"); + } lrg->Clear(); // Clear the mask lrg->Insert(reg); // Set regmask to match selected reg // For vectors and pairs, also insert the low bit of the pair - for (int i = 1; i < n_regs; i++) + // We always choose the high bit, then mask the low bits by register size + if (lrg->is_scalable() && OptoReg::is_stack(lrg->reg())) { // stack + n_regs = lrg->scalable_reg_slots(); + } + for (int i = 1; i < n_regs; i++) { lrg->Insert(OptoReg::add(reg,-i)); + } lrg->set_mask_size(n_regs); } else { // Else fatproj // mask must be equal to fatproj bits, by definition diff --git a/src/hotspot/share/opto/chaitin.hpp b/src/hotspot/share/opto/chaitin.hpp index e5be5b966..5408a24ef 100644 --- a/src/hotspot/share/opto/chaitin.hpp +++ b/src/hotspot/share/opto/chaitin.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -115,7 +115,9 @@ public: _msize_valid=1; if (_is_vector) { assert(!_fat_proj, "sanity"); - _mask.verify_sets(_num_regs); + if (!(_is_scalable && OptoReg::is_stack(_reg))) { + _mask.verify_sets(_num_regs); + } } else if (_num_regs == 2 && !_fat_proj) { _mask.verify_pairs(); } @@ -139,14 +141,37 @@ public: void clear_to_pairs() { _mask.clear_to_pairs(); debug_only(_msize_valid=0;) } void clear_to_sets() { _mask.clear_to_sets(_num_regs); debug_only(_msize_valid=0;) } - // Number of registers this live range uses when it colors private: + // Number of registers this live range uses when it colors uint16_t _num_regs; // 2 for Longs and Doubles, 1 for all else // except _num_regs is kill count for fat_proj + + // For scalable register, num_regs may not be the actual physical register size. + // We need to get the actual physical length of scalable register when scalable + // register is spilled. The size of one slot is 32-bit. + uint _scalable_reg_slots; // Actual scalable register length of slots. + // Meaningful only when _is_scalable is true. public: int num_regs() const { return _num_regs; } void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; } + uint scalable_reg_slots() { return _scalable_reg_slots; } + void set_scalable_reg_slots(uint slots) { + assert(_is_scalable, "scalable register"); + assert(slots > 0, "slots of scalable register is not valid"); + _scalable_reg_slots = slots; + } + + bool is_scalable() { +#ifdef ASSERT + if (_is_scalable) { + // Should only be a vector for now, but it could also be a RegVMask in future. + assert(_is_vector && (_num_regs == RegMask::SlotsPerVecA), "unexpected scalable reg"); + } +#endif + return _is_scalable; + } + private: // Number of physical registers this live range uses when it colors // Architecture and register-set dependent @@ -172,6 +197,8 @@ public: uint _is_oop:1, // Live-range holds an oop _is_float:1, // True if in float registers _is_vector:1, // True if in vector registers + _is_scalable:1, // True if register size is scalable + // e.g. Arm SVE vector/predicate registers. _was_spilled1:1, // True if prior spilling on def _was_spilled2:1, // True if twice prior spilling on def _is_bound:1, // live range starts life with no diff --git a/src/hotspot/share/opto/matcher.cpp b/src/hotspot/share/opto/matcher.cpp index 4cc7580a8..4fb732161 100644 --- a/src/hotspot/share/opto/matcher.cpp +++ b/src/hotspot/share/opto/matcher.cpp @@ -84,6 +84,7 @@ Matcher::Matcher() idealreg2spillmask [Op_RegF] = NULL; idealreg2spillmask [Op_RegD] = NULL; idealreg2spillmask [Op_RegP] = NULL; + idealreg2spillmask [Op_VecA] = NULL; idealreg2spillmask [Op_VecS] = NULL; idealreg2spillmask [Op_VecD] = NULL; idealreg2spillmask [Op_VecX] = NULL; @@ -97,6 +98,7 @@ Matcher::Matcher() idealreg2debugmask [Op_RegF] = NULL; idealreg2debugmask [Op_RegD] = NULL; idealreg2debugmask [Op_RegP] = NULL; + idealreg2debugmask [Op_VecA] = NULL; idealreg2debugmask [Op_VecS] = NULL; idealreg2debugmask [Op_VecD] = NULL; idealreg2debugmask [Op_VecX] = NULL; @@ -110,6 +112,7 @@ Matcher::Matcher() idealreg2mhdebugmask[Op_RegF] = NULL; idealreg2mhdebugmask[Op_RegD] = NULL; idealreg2mhdebugmask[Op_RegP] = NULL; + idealreg2mhdebugmask[Op_VecA] = NULL; idealreg2mhdebugmask[Op_VecS] = NULL; idealreg2mhdebugmask[Op_VecD] = NULL; idealreg2mhdebugmask[Op_VecX] = NULL; @@ -417,6 +420,8 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) { return rms; } +#define NOF_STACK_MASKS (3*6+6) + //---------------------------init_first_stack_mask----------------------------- // Create the initial stack mask used by values spilling to the stack. // Disallow any debug info in outgoing argument areas by setting the @@ -424,7 +429,12 @@ static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) { void Matcher::init_first_stack_mask() { // Allocate storage for spill masks as masks for the appropriate load type. - RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * (3*6+5)); + RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask) * NOF_STACK_MASKS); + + // Initialize empty placeholder masks into the newly allocated arena + for (int i = 0; i < NOF_STACK_MASKS; i++) { + new (rms + i) RegMask(); + } idealreg2spillmask [Op_RegN] = &rms[0]; idealreg2spillmask [Op_RegI] = &rms[1]; @@ -447,11 +457,12 @@ void Matcher::init_first_stack_mask() { idealreg2mhdebugmask[Op_RegD] = &rms[16]; idealreg2mhdebugmask[Op_RegP] = &rms[17]; - idealreg2spillmask [Op_VecS] = &rms[18]; - idealreg2spillmask [Op_VecD] = &rms[19]; - idealreg2spillmask [Op_VecX] = &rms[20]; - idealreg2spillmask [Op_VecY] = &rms[21]; - idealreg2spillmask [Op_VecZ] = &rms[22]; + idealreg2spillmask [Op_VecA] = &rms[18]; + idealreg2spillmask [Op_VecS] = &rms[19]; + idealreg2spillmask [Op_VecD] = &rms[20]; + idealreg2spillmask [Op_VecX] = &rms[21]; + idealreg2spillmask [Op_VecY] = &rms[22]; + idealreg2spillmask [Op_VecZ] = &rms[23]; OptoReg::Name i; @@ -478,6 +489,7 @@ void Matcher::init_first_stack_mask() { // Keep spill masks aligned. aligned_stack_mask.clear_to_pairs(); assert(aligned_stack_mask.is_AllStack(), "should be infinite stack"); + RegMask scalable_stack_mask = aligned_stack_mask; *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP]; #ifdef _LP64 @@ -548,28 +560,48 @@ void Matcher::init_first_stack_mask() { *idealreg2spillmask[Op_VecZ] = *idealreg2regmask[Op_VecZ]; idealreg2spillmask[Op_VecZ]->OR(aligned_stack_mask); } - if (UseFPUForSpilling) { - // This mask logic assumes that the spill operations are - // symmetric and that the registers involved are the same size. - // On sparc for instance we may have to use 64 bit moves will - // kill 2 registers when used with F0-F31. - idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]); - idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]); + + if (Matcher::supports_scalable_vector()) { + int k = 1; + OptoReg::Name in = OptoReg::add(_in_arg_limit, -1); + // Exclude last input arg stack slots to avoid spilling vector register there, + // otherwise vector spills could stomp over stack slots in caller frame. + for (; (in >= init_in) && (k < scalable_vector_reg_size(T_FLOAT)); k++) { + scalable_stack_mask.Remove(in); + in = OptoReg::add(in, -1); + } + + // For VecA + scalable_stack_mask.clear_to_sets(RegMask::SlotsPerVecA); + assert(scalable_stack_mask.is_AllStack(), "should be infinite stack"); + *idealreg2spillmask[Op_VecA] = *idealreg2regmask[Op_VecA]; + idealreg2spillmask[Op_VecA]->OR(scalable_stack_mask); + } else { + *idealreg2spillmask[Op_VecA] = RegMask::Empty; + } + + if (UseFPUForSpilling) { + // This mask logic assumes that the spill operations are + // symmetric and that the registers involved are the same size. + // On sparc for instance we may have to use 64 bit moves will + // kill 2 registers when used with F0-F31. + idealreg2spillmask[Op_RegI]->OR(*idealreg2regmask[Op_RegF]); + idealreg2spillmask[Op_RegF]->OR(*idealreg2regmask[Op_RegI]); #ifdef _LP64 - idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]); - idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); - idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); - idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]); + idealreg2spillmask[Op_RegN]->OR(*idealreg2regmask[Op_RegF]); + idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); + idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); + idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegD]); #else - idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]); + idealreg2spillmask[Op_RegP]->OR(*idealreg2regmask[Op_RegF]); #ifdef ARM - // ARM has support for moving 64bit values between a pair of - // integer registers and a double register - idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); - idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); + // ARM has support for moving 64bit values between a pair of + // integer registers and a double register + idealreg2spillmask[Op_RegL]->OR(*idealreg2regmask[Op_RegD]); + idealreg2spillmask[Op_RegD]->OR(*idealreg2regmask[Op_RegL]); #endif #endif - } + } // Make up debug masks. Any spill slot plus callee-save registers. // Caller-save registers are assumed to be trashable by the various @@ -872,6 +904,10 @@ void Matcher::init_spill_mask( Node *ret ) { idealreg2regmask[Op_RegP] = &spillP->out_RegMask(); // Vector regmasks. + if (Matcher::supports_scalable_vector()) { + MachNode *spillVectA = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTA)); + idealreg2regmask[Op_VecA] = &spillVectA->out_RegMask(); + } if (Matcher::vector_size_supported(T_BYTE,4)) { TypeVect::VECTS = TypeVect::make(T_BYTE, 4); MachNode *spillVectS = match_tree(new LoadVectorNode(NULL,mem,fp,atp,TypeVect::VECTS)); @@ -1575,7 +1611,6 @@ Node* Matcher::Label_Root(const Node* n, State* svec, Node* control, Node*& mem) } } - // Call DFA to match this node, and return svec->DFA( n->Opcode(), n ); diff --git a/src/hotspot/share/opto/matcher.hpp b/src/hotspot/share/opto/matcher.hpp index 244e3d1f8..ed890f88e 100644 --- a/src/hotspot/share/opto/matcher.hpp +++ b/src/hotspot/share/opto/matcher.hpp @@ -333,6 +333,10 @@ public: Matcher::min_vector_size(bt) <= size); } + static const bool supports_scalable_vector(); + // Actual max scalable vector register length. + static const int scalable_vector_reg_size(const BasicType bt); + // Vector ideal reg static const uint vector_ideal_reg(int len); static const uint vector_shift_count_ideal_reg(int len); diff --git a/src/hotspot/share/opto/opcodes.cpp b/src/hotspot/share/opto/opcodes.cpp index e31e8d847..1a826d8ba 100644 --- a/src/hotspot/share/opto/opcodes.cpp +++ b/src/hotspot/share/opto/opcodes.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,12 +38,14 @@ const char *NodeClassNames[] = { "RegF", "RegD", "RegL", - "RegFlags", + "VecA", "VecS", "VecD", "VecX", "VecY", "VecZ", + "RegVMask", + "RegFlags", "_last_machine_leaf", #include "classes.hpp" "_last_class_name", diff --git a/src/hotspot/share/opto/opcodes.hpp b/src/hotspot/share/opto/opcodes.hpp index ae3d61ce0..ec96ba055 100644 --- a/src/hotspot/share/opto/opcodes.hpp +++ b/src/hotspot/share/opto/opcodes.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -37,11 +37,13 @@ enum Opcodes { macro(RegF) // Machine float register macro(RegD) // Machine double register macro(RegL) // Machine long register + macro(VecA) // Machine vectora register macro(VecS) // Machine vectors register macro(VecD) // Machine vectord register macro(VecX) // Machine vectorx register macro(VecY) // Machine vectory register macro(VecZ) // Machine vectorz register + macro(RegVMask) // Vector mask/predicate register macro(RegFlags) // Machine flags register _last_machine_leaf, // Split between regular opcodes and machine #include "classes.hpp" diff --git a/src/hotspot/share/opto/postaloc.cpp b/src/hotspot/share/opto/postaloc.cpp index d572ac9fe..3514b37bc 100644 --- a/src/hotspot/share/opto/postaloc.cpp +++ b/src/hotspot/share/opto/postaloc.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1998, 2016, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1998, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -266,9 +266,9 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v Node *val = skip_copies(n->in(k)); if (val == x) return blk_adjust; // No progress? - int n_regs = RegMask::num_registers(val->ideal_reg()); uint val_idx = _lrg_map.live_range_id(val); OptoReg::Name val_reg = lrgs(val_idx).reg(); + int n_regs = RegMask::num_registers(val->ideal_reg(), lrgs(val_idx)); // See if it happens to already be in the correct register! // (either Phi's direct register, or the common case of the name @@ -305,8 +305,26 @@ int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &v } Node *vv = value[reg]; + // For scalable register, number of registers may be inconsistent between + // "val_reg" and "reg". For example, when "val" resides in register + // but "reg" is located in stack. + if (lrgs(val_idx).is_scalable()) { + assert(val->ideal_reg() == Op_VecA, "scalable vector register"); + if (OptoReg::is_stack(reg)) { + n_regs = lrgs(val_idx).scalable_reg_slots(); + } else { + n_regs = RegMask::SlotsPerVecA; + } + } if (n_regs > 1) { // Doubles and vectors check for aligned-adjacent set - uint last = (n_regs-1); // Looking for the last part of a set + uint last; + if (lrgs(val_idx).is_scalable()) { + assert(val->ideal_reg() == Op_VecA, "scalable vector register"); + // For scalable vector register, regmask is always SlotsPerVecA bits aligned + last = RegMask::SlotsPerVecA - 1; + } else { + last = (n_regs-1); // Looking for the last part of a set + } if ((reg&last) != last) continue; // Wrong part of a set if (!register_contains_value(vv, reg, n_regs, value)) continue; // Different value } @@ -591,7 +609,7 @@ void PhaseChaitin::post_allocate_copy_removal() { uint k; Node *phi = block->get_node(j); uint pidx = _lrg_map.live_range_id(phi); - OptoReg::Name preg = lrgs(_lrg_map.live_range_id(phi)).reg(); + OptoReg::Name preg = lrgs(pidx).reg(); // Remove copies remaining on edges. Check for junk phi. Node *u = NULL; @@ -619,7 +637,7 @@ void PhaseChaitin::post_allocate_copy_removal() { if( pidx ) { value.map(preg,phi); regnd.map(preg,phi); - int n_regs = RegMask::num_registers(phi->ideal_reg()); + int n_regs = RegMask::num_registers(phi->ideal_reg(), lrgs(pidx)); for (int l = 1; l < n_regs; l++) { OptoReg::Name preg_lo = OptoReg::add(preg,-l); value.map(preg_lo,phi); @@ -663,7 +681,7 @@ void PhaseChaitin::post_allocate_copy_removal() { regnd.map(ureg, def); // Record other half of doubles uint def_ideal_reg = def->ideal_reg(); - int n_regs = RegMask::num_registers(def_ideal_reg); + int n_regs = RegMask::num_registers(def_ideal_reg, lrgs(_lrg_map.live_range_id(def))); for (int l = 1; l < n_regs; l++) { OptoReg::Name ureg_lo = OptoReg::add(ureg,-l); if (!value[ureg_lo] && @@ -707,7 +725,7 @@ void PhaseChaitin::post_allocate_copy_removal() { } uint n_ideal_reg = n->ideal_reg(); - int n_regs = RegMask::num_registers(n_ideal_reg); + int n_regs = RegMask::num_registers(n_ideal_reg, lrgs(lidx)); if (n_regs == 1) { // If Node 'n' does not change the value mapped by the register, // then 'n' is a useless copy. Do not update the register->node diff --git a/src/hotspot/share/opto/regmask.cpp b/src/hotspot/share/opto/regmask.cpp index 2e04c42eb..dd9b5476b 100644 --- a/src/hotspot/share/opto/regmask.cpp +++ b/src/hotspot/share/opto/regmask.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -24,6 +24,7 @@ #include "precompiled.hpp" #include "opto/ad.hpp" +#include "opto/chaitin.hpp" #include "opto/compile.hpp" #include "opto/matcher.hpp" #include "opto/node.hpp" @@ -116,30 +117,47 @@ const RegMask RegMask::Empty( //============================================================================= bool RegMask::is_vector(uint ireg) { - return (ireg == Op_VecS || ireg == Op_VecD || + return (ireg == Op_VecA || ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ); } int RegMask::num_registers(uint ireg) { switch(ireg) { case Op_VecZ: - return 16; + return SlotsPerVecZ; case Op_VecY: - return 8; + return SlotsPerVecY; case Op_VecX: - return 4; + return SlotsPerVecX; case Op_VecD: + return SlotsPerVecD; case Op_RegD: case Op_RegL: #ifdef _LP64 case Op_RegP: #endif return 2; + case Op_VecA: + assert(Matcher::supports_scalable_vector(), "does not support scalable vector"); + return SlotsPerVecA; } // Op_VecS and the rest ideal registers. return 1; } +int RegMask::num_registers(uint ireg, LRG &lrg) { + int n_regs = num_registers(ireg); + + // assigned is OptoReg which is selected by register allocator + OptoReg::Name assigned = lrg.reg(); + assert(OptoReg::is_valid(assigned), "should be valid opto register"); + + if (lrg.is_scalable() && OptoReg::is_stack(assigned)) { + n_regs = lrg.scalable_reg_slots(); + } + return n_regs; +} + //------------------------------find_first_pair-------------------------------- // Find the lowest-numbered register pair in the mask. Return the // HIGHEST register number in the pair, or BAD if no pairs. @@ -238,14 +256,30 @@ int RegMask::is_bound_pair() const { return true; } +// Check that whether given reg number with size is valid +// for current regmask, where reg is the highest number. +bool RegMask::is_valid_reg(OptoReg::Name reg, const int size) const { + for (int i = 0; i < size; i++) { + if (!Member(reg - i)) { + return false; + } + } + return true; +} + // only indicies of power 2 are accessed, so index 3 is only filled in for storage. static int low_bits[5] = { 0x55555555, 0x11111111, 0x01010101, 0x00000000, 0x00010001 }; //------------------------------find_first_set--------------------------------- // Find the lowest-numbered register set in the mask. Return the // HIGHEST register number in the set, or BAD if no sets. // Works also for size 1. -OptoReg::Name RegMask::find_first_set(const int size) const { - verify_sets(size); +OptoReg::Name RegMask::find_first_set(LRG &lrg, const int size) const { + if (lrg.is_scalable()) { + // For scalable vector register, regmask is SlotsPerVecA bits aligned. + assert(is_aligned_sets(SlotsPerVecA), "mask is not aligned, adjacent sets"); + } else { + assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); + } for (int i = 0; i < RM_SIZE; i++) { if (_A[i]) { // Found some bits int bit = _A[i] & -_A[i]; // Extract low bit @@ -325,12 +359,16 @@ bool RegMask::is_aligned_sets(const int size) const { while (bits) { // Check bits for pairing int bit = bits & -bits; // Extract low bit // Low bit is not odd means its mis-aligned. - if ((bit & low_bits_mask) == 0) return false; + if ((bit & low_bits_mask) == 0) { + return false; + } // Do extra work since (bit << size) may overflow. int hi_bit = bit << (size-1); // high bit int set = hi_bit + ((hi_bit-1) & ~(bit-1)); // Check for aligned adjacent bits in this set - if ((bits & set) != set) return false; + if ((bits & set) != set) { + return false; + } bits -= set; // Remove this set } } diff --git a/src/hotspot/share/opto/regmask.hpp b/src/hotspot/share/opto/regmask.hpp index c64d08795..b733b87ad 100644 --- a/src/hotspot/share/opto/regmask.hpp +++ b/src/hotspot/share/opto/regmask.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -28,6 +28,8 @@ #include "code/vmreg.hpp" #include "opto/optoreg.hpp" +class LRG; + // Some fun naming (textual) substitutions: // // RegMask::get_low_elem() ==> RegMask::find_first_elem() @@ -95,11 +97,13 @@ public: // requirement is internal to the allocator, and independent of any // particular platform. enum { SlotsPerLong = 2, + SlotsPerVecA = 8, SlotsPerVecS = 1, SlotsPerVecD = 2, SlotsPerVecX = 4, SlotsPerVecY = 8, - SlotsPerVecZ = 16 }; + SlotsPerVecZ = 16, + }; // A constructor only used by the ADLC output. All mask fields are filled // in directly. Calls to this look something like RM(1,2,3,4); @@ -204,10 +208,14 @@ public: return false; } + // Check that whether given reg number with size is valid + // for current regmask, where reg is the highest number. + bool is_valid_reg(OptoReg::Name reg, const int size) const; + // Find the lowest-numbered register set in the mask. Return the // HIGHEST register number in the set, or BAD if no sets. // Assert that the mask contains only bit sets. - OptoReg::Name find_first_set(const int size) const; + OptoReg::Name find_first_set(LRG &lrg, const int size) const; // Clear out partial bits; leave only aligned adjacent bit sets of size. void clear_to_sets(const int size); @@ -226,6 +234,7 @@ public: static bool is_vector(uint ireg); static int num_registers(uint ireg); + static int num_registers(uint ireg, LRG &lrg); // Fast overlap test. Non-zero if any registers in common. int overlap( const RegMask &rm ) const { diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp index 223b7a1c6..1b46cb452 100644 --- a/src/hotspot/share/opto/type.cpp +++ b/src/hotspot/share/opto/type.cpp @@ -79,6 +79,7 @@ const Type::TypeInfo Type::_type_info[Type::lastype] = { { Bad, T_ILLEGAL, "vectory:", false, 0, relocInfo::none }, // VectorY { Bad, T_ILLEGAL, "vectorz:", false, 0, relocInfo::none }, // VectorZ #else // all other + { Bad, T_ILLEGAL, "vectora:", false, Op_VecA, relocInfo::none }, // VectorA. { Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS { Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD { Bad, T_ILLEGAL, "vectorx:", false, Op_VecX, relocInfo::none }, // VectorX @@ -655,6 +656,10 @@ void Type::Initialize_shared(Compile* current) { // get_zero_type() should not happen for T_CONFLICT _zero_type[T_CONFLICT]= NULL; + if (Matcher::supports_scalable_vector()) { + TypeVect::VECTA = TypeVect::make(T_BYTE, Matcher::scalable_vector_reg_size(T_BYTE)); + } + // Vector predefined types, it needs initialized _const_basic_type[]. if (Matcher::vector_size_supported(T_BYTE,4)) { TypeVect::VECTS = TypeVect::make(T_BYTE,4); @@ -671,6 +676,8 @@ void Type::Initialize_shared(Compile* current) { if (Matcher::vector_size_supported(T_FLOAT,16)) { TypeVect::VECTZ = TypeVect::make(T_FLOAT,16); } + + mreg2type[Op_VecA] = TypeVect::VECTA; mreg2type[Op_VecS] = TypeVect::VECTS; mreg2type[Op_VecD] = TypeVect::VECTD; mreg2type[Op_VecX] = TypeVect::VECTX; @@ -990,6 +997,7 @@ const Type::TYPES Type::dual_type[Type::lastype] = { Bad, // Tuple - handled in v-call Bad, // Array - handled in v-call + Bad, // VectorA - handled in v-call Bad, // VectorS - handled in v-call Bad, // VectorD - handled in v-call Bad, // VectorX - handled in v-call @@ -1890,7 +1898,6 @@ const TypeTuple *TypeTuple::LONG_PAIR; const TypeTuple *TypeTuple::INT_CC_PAIR; const TypeTuple *TypeTuple::LONG_CC_PAIR; - //------------------------------make------------------------------------------- // Make a TypeTuple from the range of a method signature const TypeTuple *TypeTuple::make_range(ciSignature* sig) { @@ -2262,6 +2269,7 @@ bool TypeAry::ary_must_be_exact() const { //==============================TypeVect======================================= // Convenience common pre-built types. +const TypeVect *TypeVect::VECTA = NULL; // vector length agnostic const TypeVect *TypeVect::VECTS = NULL; // 32-bit vectors const TypeVect *TypeVect::VECTD = NULL; // 64-bit vectors const TypeVect *TypeVect::VECTX = NULL; // 128-bit vectors @@ -2272,10 +2280,11 @@ const TypeVect *TypeVect::VECTZ = NULL; // 512-bit vectors const TypeVect* TypeVect::make(const Type *elem, uint length) { BasicType elem_bt = elem->array_element_basic_type(); assert(is_java_primitive(elem_bt), "only primitive types in vector"); - assert(length > 1 && is_power_of_2(length), "vector length is power of 2"); assert(Matcher::vector_size_supported(elem_bt, length), "length in range"); int size = length * type2aelembytes(elem_bt); switch (Matcher::vector_ideal_reg(size)) { + case Op_VecA: + return (TypeVect*)(new TypeVectA(elem, length))->hashcons(); case Op_VecS: return (TypeVect*)(new TypeVectS(elem, length))->hashcons(); case Op_RegL: @@ -2307,7 +2316,7 @@ const Type *TypeVect::xmeet( const Type *t ) const { default: // All else is a mistake typerr(t); - + case VectorA: case VectorS: case VectorD: case VectorX: @@ -2362,6 +2371,8 @@ bool TypeVect::empty(void) const { #ifndef PRODUCT void TypeVect::dump2(Dict &d, uint depth, outputStream *st) const { switch (base()) { + case VectorA: + st->print("vectora["); break; case VectorS: st->print("vectors["); break; case VectorD: diff --git a/src/hotspot/share/opto/type.hpp b/src/hotspot/share/opto/type.hpp index a7eec281e..6787b947d 100644 --- a/src/hotspot/share/opto/type.hpp +++ b/src/hotspot/share/opto/type.hpp @@ -53,6 +53,7 @@ class TypeNarrowKlass; class TypeAry; class TypeTuple; class TypeVect; +class TypeVectA; class TypeVectS; class TypeVectD; class TypeVectX; @@ -87,6 +88,7 @@ public: Tuple, // Method signature or object layout Array, // Array types + VectorA, // (Scalable) Vector types for vector length agnostic VectorS, // 32bit Vector types VectorD, // 64bit Vector types VectorX, // 128bit Vector types @@ -758,6 +760,7 @@ public: virtual const Type *xmeet( const Type *t) const; virtual const Type *xdual() const; // Compute dual right now. + static const TypeVect *VECTA; static const TypeVect *VECTS; static const TypeVect *VECTD; static const TypeVect *VECTX; @@ -769,6 +772,11 @@ public: #endif }; +class TypeVectA : public TypeVect { + friend class TypeVect; + TypeVectA(const Type* elem, uint length) : TypeVect(VectorA, elem, length) {} +}; + class TypeVectS : public TypeVect { friend class TypeVect; TypeVectS(const Type* elem, uint length) : TypeVect(VectorS, elem, length) {} @@ -1619,12 +1627,12 @@ inline const TypeAry *Type::is_ary() const { } inline const TypeVect *Type::is_vect() const { - assert( _base >= VectorS && _base <= VectorZ, "Not a Vector" ); + assert( _base >= VectorA && _base <= VectorZ, "Not a Vector" ); return (TypeVect*)this; } inline const TypeVect *Type::isa_vect() const { - return (_base >= VectorS && _base <= VectorZ) ? (TypeVect*)this : NULL; + return (_base >= VectorA && _base <= VectorZ) ? (TypeVect*)this : NULL; } inline const TypePtr *Type::is_ptr() const { -- 2.19.1
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2