Projects
Mega:24.03:SP1:Everything
openjdk-1.8.0
_service:tar_scm:0034-8250902-Implement-MD5-Int...
Sign Up
Log In
Username
Password
Overview
Repositories
Revisions
Requests
Users
Attributes
Meta
File _service:tar_scm:0034-8250902-Implement-MD5-Intrinsics-on-x64.patch of Package openjdk-1.8.0
Date: Fri, 9 Jun 2023 09:23:12 +0800 Subject: 8250902:Implement MD5 Intrinsics on x64 Bug url: https://bugs.openjdk.org/browse/JDK-8250902 --- hotspot/src/cpu/x86/vm/assembler_x86.cpp | 10 ++ hotspot/src/cpu/x86/vm/assembler_x86.hpp | 2 + hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp | 153 ++++++++++++++++++ hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp | 3 + .../src/cpu/x86/vm/stubGenerator_x86_32.cpp | 44 +++++ .../src/cpu/x86/vm/stubGenerator_x86_64.cpp | 42 +++++ hotspot/src/cpu/x86/vm/vm_version_x86.cpp | 5 - hotspot/src/share/vm/asm/assembler.hpp | 3 + hotspot/src/share/vm/asm/codeBuffer.hpp | 13 ++ .../intrinsics/IntrinsicAvailableTest.java | 2 +- .../GenericTestCaseForUnsupportedX86CPU.java | 6 +- 11 files changed, 276 insertions(+), 7 deletions(-) diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.cpp b/hotspot/src/cpu/x86/vm/assembler_x86.cpp index ddc1acfd8..c0ae3d32a 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.cpp @@ -2808,6 +2808,16 @@ void Assembler::ret(int imm16) { } } +void Assembler::roll(Register dst, int imm8) { + assert(isShiftCount(imm8 >> 1), "illegal shift count"); + int encode = prefix_and_encode(dst->encoding()); + if (imm8 == 1) { + emit_int16((unsigned char)0xD1, (0xC0 | encode)); + } else { + emit_int24((unsigned char)0xC1, (0xc0 | encode), imm8); + } +} + void Assembler::sahf() { #ifdef _LP64 // Not supported in 64bit mode diff --git a/hotspot/src/cpu/x86/vm/assembler_x86.hpp b/hotspot/src/cpu/x86/vm/assembler_x86.hpp index c2e70bc2a..1695d7969 100644 --- a/hotspot/src/cpu/x86/vm/assembler_x86.hpp +++ b/hotspot/src/cpu/x86/vm/assembler_x86.hpp @@ -1574,6 +1574,8 @@ private: void ret(int imm16); + void roll(Register dst, int imm8); + #ifdef _LP64 void rorq(Register dst, int imm8); void rorxq(Register dst, Register src, int imm8); diff --git a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp index 1b09514c9..3aca9a30d 100644 --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.cpp @@ -8577,3 +8577,156 @@ SkipIfEqual::SkipIfEqual( SkipIfEqual::~SkipIfEqual() { _masm->bind(_label); } + +void MacroAssembler::fast_md5(Register buf, Address state, Address ofs, Address limit, bool multi_block) { + + Label start, done_hash, loop0; + + bind(start); + + bind(loop0); + + // Save hash values for addition after rounds + movptr(rdi, state); + movl(rax, Address(rdi, 0)); + movl(rbx, Address(rdi, 4)); + movl(rcx, Address(rdi, 8)); + movl(rdx, Address(rdi, 12)); + +#define FF(r1, r2, r3, r4, k, s, t) \ + movl(rsi, r3); \ + addl(r1, Address(buf, k*4)); \ + xorl(rsi, r4); \ + andl(rsi, r2); \ + xorl(rsi, r4); \ + leal(r1, Address(r1, rsi, Address::times_1, t)); \ + roll(r1, s); \ + addl(r1, r2); + +#define GG(r1, r2, r3, r4, k, s, t) \ + movl(rsi, r4); \ + movl(rdi, r4); \ + addl(r1, Address(buf, k*4)); \ + notl(rsi); \ + andl(rdi, r2); \ + andl(rsi, r3); \ + orl(rsi, rdi); \ + leal(r1, Address(r1, rsi, Address::times_1, t)); \ + roll(r1, s); \ + addl(r1, r2); + +#define HH(r1, r2, r3, r4, k, s, t) \ + movl(rsi, r3); \ + addl(r1, Address(buf, k*4)); \ + xorl(rsi, r4); \ + xorl(rsi, r2); \ + leal(r1, Address(r1, rsi, Address::times_1, t)); \ + roll(r1, s); \ + addl(r1, r2); + +#define II(r1, r2, r3, r4, k, s, t) \ + movl(rsi, r4); \ + notl(rsi); \ + addl(r1, Address(buf, k*4)); \ + orl(rsi, r2); \ + xorl(rsi, r3); \ + leal(r1, Address(r1, rsi, Address::times_1, t)); \ + roll(r1, s); \ + addl(r1, r2); + + // Round 1 + FF(rax, rbx, rcx, rdx, 0, 7, 0xd76aa478) + FF(rdx, rax, rbx, rcx, 1, 12, 0xe8c7b756) + FF(rcx, rdx, rax, rbx, 2, 17, 0x242070db) + FF(rbx, rcx, rdx, rax, 3, 22, 0xc1bdceee) + FF(rax, rbx, rcx, rdx, 4, 7, 0xf57c0faf) + FF(rdx, rax, rbx, rcx, 5, 12, 0x4787c62a) + FF(rcx, rdx, rax, rbx, 6, 17, 0xa8304613) + FF(rbx, rcx, rdx, rax, 7, 22, 0xfd469501) + FF(rax, rbx, rcx, rdx, 8, 7, 0x698098d8) + FF(rdx, rax, rbx, rcx, 9, 12, 0x8b44f7af) + FF(rcx, rdx, rax, rbx, 10, 17, 0xffff5bb1) + FF(rbx, rcx, rdx, rax, 11, 22, 0x895cd7be) + FF(rax, rbx, rcx, rdx, 12, 7, 0x6b901122) + FF(rdx, rax, rbx, rcx, 13, 12, 0xfd987193) + FF(rcx, rdx, rax, rbx, 14, 17, 0xa679438e) + FF(rbx, rcx, rdx, rax, 15, 22, 0x49b40821) + + // Round 2 + GG(rax, rbx, rcx, rdx, 1, 5, 0xf61e2562) + GG(rdx, rax, rbx, rcx, 6, 9, 0xc040b340) + GG(rcx, rdx, rax, rbx, 11, 14, 0x265e5a51) + GG(rbx, rcx, rdx, rax, 0, 20, 0xe9b6c7aa) + GG(rax, rbx, rcx, rdx, 5, 5, 0xd62f105d) + GG(rdx, rax, rbx, rcx, 10, 9, 0x02441453) + GG(rcx, rdx, rax, rbx, 15, 14, 0xd8a1e681) + GG(rbx, rcx, rdx, rax, 4, 20, 0xe7d3fbc8) + GG(rax, rbx, rcx, rdx, 9, 5, 0x21e1cde6) + GG(rdx, rax, rbx, rcx, 14, 9, 0xc33707d6) + GG(rcx, rdx, rax, rbx, 3, 14, 0xf4d50d87) + GG(rbx, rcx, rdx, rax, 8, 20, 0x455a14ed) + GG(rax, rbx, rcx, rdx, 13, 5, 0xa9e3e905) + GG(rdx, rax, rbx, rcx, 2, 9, 0xfcefa3f8) + GG(rcx, rdx, rax, rbx, 7, 14, 0x676f02d9) + GG(rbx, rcx, rdx, rax, 12, 20, 0x8d2a4c8a) + + // Round 3 + HH(rax, rbx, rcx, rdx, 5, 4, 0xfffa3942) + HH(rdx, rax, rbx, rcx, 8, 11, 0x8771f681) + HH(rcx, rdx, rax, rbx, 11, 16, 0x6d9d6122) + HH(rbx, rcx, rdx, rax, 14, 23, 0xfde5380c) + HH(rax, rbx, rcx, rdx, 1, 4, 0xa4beea44) + HH(rdx, rax, rbx, rcx, 4, 11, 0x4bdecfa9) + HH(rcx, rdx, rax, rbx, 7, 16, 0xf6bb4b60) + HH(rbx, rcx, rdx, rax, 10, 23, 0xbebfbc70) + HH(rax, rbx, rcx, rdx, 13, 4, 0x289b7ec6) + HH(rdx, rax, rbx, rcx, 0, 11, 0xeaa127fa) + HH(rcx, rdx, rax, rbx, 3, 16, 0xd4ef3085) + HH(rbx, rcx, rdx, rax, 6, 23, 0x04881d05) + HH(rax, rbx, rcx, rdx, 9, 4, 0xd9d4d039) + HH(rdx, rax, rbx, rcx, 12, 11, 0xe6db99e5) + HH(rcx, rdx, rax, rbx, 15, 16, 0x1fa27cf8) + HH(rbx, rcx, rdx, rax, 2, 23, 0xc4ac5665) + + // Round 4 + II(rax, rbx, rcx, rdx, 0, 6, 0xf4292244) + II(rdx, rax, rbx, rcx, 7, 10, 0x432aff97) + II(rcx, rdx, rax, rbx, 14, 15, 0xab9423a7) + II(rbx, rcx, rdx, rax, 5, 21, 0xfc93a039) + II(rax, rbx, rcx, rdx, 12, 6, 0x655b59c3) + II(rdx, rax, rbx, rcx, 3, 10, 0x8f0ccc92) + II(rcx, rdx, rax, rbx, 10, 15, 0xffeff47d) + II(rbx, rcx, rdx, rax, 1, 21, 0x85845dd1) + II(rax, rbx, rcx, rdx, 8, 6, 0x6fa87e4f) + II(rdx, rax, rbx, rcx, 15, 10, 0xfe2ce6e0) + II(rcx, rdx, rax, rbx, 6, 15, 0xa3014314) + II(rbx, rcx, rdx, rax, 13, 21, 0x4e0811a1) + II(rax, rbx, rcx, rdx, 4, 6, 0xf7537e82) + II(rdx, rax, rbx, rcx, 11, 10, 0xbd3af235) + II(rcx, rdx, rax, rbx, 2, 15, 0x2ad7d2bb) + II(rbx, rcx, rdx, rax, 9, 21, 0xeb86d391) + +#undef FF +#undef GG +#undef HH +#undef II + + // write hash values back in the correct order + movptr(rdi, state); + addl(Address(rdi, 0), rax); + addl(Address(rdi, 4), rbx); + addl(Address(rdi, 8), rcx); + addl(Address(rdi, 12), rdx); + + if (multi_block) { + // increment data pointer and loop if more to process + addptr(buf, 64); + addl(ofs, 64); + movl(rsi, ofs); + cmpl(rsi, limit); + jcc(Assembler::belowEqual, loop0); + movptr(rax, rsi); //return ofs + } + + bind(done_hash); +} diff --git a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp index e94fdd7d7..c18645f18 100644 --- a/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp +++ b/hotspot/src/cpu/x86/vm/macroAssembler_x86.hpp @@ -907,6 +907,9 @@ class MacroAssembler: public Assembler { // computes pow(x,y). Fallback to runtime call included. void pow_with_fallback(int num_fpu_regs_in_use) { pow_or_exp(false, num_fpu_regs_in_use); } + void fast_md5(Register buf, Address state, Address ofs, Address limit, + bool multi_block); + private: // call runtime as a fallback for trig functions and pow/exp. diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp index f555f3326..b4e3f2914 100644 --- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp +++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_32.cpp @@ -3057,6 +3057,45 @@ class StubGenerator: public StubCodeGenerator { return start; } + // ofs and limit are use for multi-block byte array. + // int com.sun.security.provider.MD5.implCompress(byte[] b, int ofs) + address generate_md5_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + const Register buf_param = rbp; + const Address state_param(rsp, 0 * wordSize); + const Address ofs_param (rsp, 1 * wordSize); + const Address limit_param(rsp, 2 * wordSize); + + __ enter(); + __ push(rbx); + __ push(rdi); + __ push(rsi); + __ push(rbp); + __ subptr(rsp, 3 * wordSize); + + __ movptr(rsi, Address(rbp, 8 + 4)); + __ movptr(state_param, rsi); + if (multi_block) { + __ movptr(rsi, Address(rbp, 8 + 8)); + __ movptr(ofs_param, rsi); + __ movptr(rsi, Address(rbp, 8 + 12)); + __ movptr(limit_param, rsi); + } + __ movptr(buf_param, Address(rbp, 8 + 0)); // do it last because it override rbp + __ fast_md5(buf_param, state_param, ofs_param, limit_param, multi_block); + + __ addptr(rsp, 3 * wordSize); + __ pop(rbp); + __ pop(rsi); + __ pop(rdi); + __ pop(rbx); + __ leave(); + __ ret(0); + return start; + } // byte swap x86 long address generate_ghash_long_swap_mask() { @@ -3525,6 +3564,11 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel(); } + if (UseMD5Intrinsics) { + StubRoutines::_md5_implCompress = generate_md5_implCompress(false, "md5_implCompress"); + StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true, "md5_implCompressMB"); + } + // Generate GHASH intrinsics code if (UseGHASHIntrinsics) { StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); diff --git a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp index 254f63392..f6511b273 100644 --- a/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp +++ b/hotspot/src/cpu/x86/vm/stubGenerator_x86_64.cpp @@ -3660,6 +3660,43 @@ class StubGenerator: public StubCodeGenerator { return start; } + // ofs and limit are use for multi-block byte array. + // int com.sun.security.provider.MD5.implCompress(byte[] b, int ofs) + address generate_md5_implCompress(bool multi_block, const char *name) { + __ align(CodeEntryAlignment); + StubCodeMark mark(this, "StubRoutines", name); + address start = __ pc(); + + const Register buf_param = r15; + const Address state_param(rsp, 0 * wordSize); + const Address ofs_param (rsp, 1 * wordSize ); + const Address limit_param(rsp, 1 * wordSize + 4); + + __ enter(); + __ push(rbx); + __ push(rdi); + __ push(rsi); + __ push(r15); + __ subptr(rsp, 2 * wordSize); + + __ movptr(buf_param, c_rarg0); + __ movptr(state_param, c_rarg1); + if (multi_block) { + __ movl(ofs_param, c_rarg2); + __ movl(limit_param, c_rarg3); + } + __ fast_md5(buf_param, state_param, ofs_param, limit_param, multi_block); + + __ addptr(rsp, 2 * wordSize); + __ pop(r15); + __ pop(rsi); + __ pop(rdi); + __ pop(rbx); + __ leave(); + __ ret(0); + return start; + } + // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time // to hide instruction latency // @@ -4584,6 +4621,11 @@ class StubGenerator: public StubCodeGenerator { StubRoutines::_counterMode_AESCrypt = generate_counterMode_AESCrypt_Parallel(); } + if (UseMD5Intrinsics) { + StubRoutines::_md5_implCompress = generate_md5_implCompress(false, "md5_implCompress"); + StubRoutines::_md5_implCompressMB = generate_md5_implCompress(true, "md5_implCompressMB"); + } + // Generate GHASH intrinsics code if (UseGHASHIntrinsics) { StubRoutines::x86::_ghash_long_swap_mask_addr = generate_ghash_long_swap_mask(); diff --git a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp index 41f827364..ce3037d76 100644 --- a/hotspot/src/cpu/x86/vm/vm_version_x86.cpp +++ b/hotspot/src/cpu/x86/vm/vm_version_x86.cpp @@ -669,11 +669,6 @@ void VM_Version::get_processor_features() { FLAG_SET_DEFAULT(UseF2jBLASIntrinsics, false); } - if (UseMD5Intrinsics) { - warning("MD5 intrinsics are not available on this CPU"); - FLAG_SET_DEFAULT(UseMD5Intrinsics, false); - } - // Adjust RTM (Restricted Transactional Memory) flags if (!supports_rtm() && UseRTMLocking) { // Can't continue because UseRTMLocking affects UseBiasedLocking flag diff --git a/hotspot/src/share/vm/asm/assembler.hpp b/hotspot/src/share/vm/asm/assembler.hpp index d4d31d47e..823653d55 100644 --- a/hotspot/src/share/vm/asm/assembler.hpp +++ b/hotspot/src/share/vm/asm/assembler.hpp @@ -287,6 +287,9 @@ class AbstractAssembler : public ResourceObj { void emit_int8( int8_t x) { code_section()->emit_int8( x); } void emit_int16( int16_t x) { code_section()->emit_int16( x); } + void emit_int16( uint8_t x1, uint8_t x2) { code_section()->emit_int16(x1, x2); } + + void emit_int24( uint8_t x1, uint8_t x2, uint8_t x3) { code_section()->emit_int24(x1, x2, x3); } void emit_int32( int32_t x) { code_section()->emit_int32( x); } void emit_int64( int64_t x) { code_section()->emit_int64( x); } diff --git a/hotspot/src/share/vm/asm/codeBuffer.hpp b/hotspot/src/share/vm/asm/codeBuffer.hpp index a89f2c18b..32c2f717a 100644 --- a/hotspot/src/share/vm/asm/codeBuffer.hpp +++ b/hotspot/src/share/vm/asm/codeBuffer.hpp @@ -195,6 +195,19 @@ class CodeSection VALUE_OBJ_CLASS_SPEC { // Code emission void emit_int8 ( int8_t x) { *((int8_t*) end()) = x; set_end(end() + sizeof(int8_t)); } void emit_int16( int16_t x) { *((int16_t*) end()) = x; set_end(end() + sizeof(int16_t)); } + void emit_int16(uint8_t x1, uint8_t x2) { + address curr = end(); + *((uint8_t*) curr++) = x1; + *((uint8_t*) curr++) = x2; + set_end(curr); + } + void emit_int24(uint8_t x1, uint8_t x2, uint8_t x3) { + address curr = end(); + *((uint8_t*) curr++) = x1; + *((uint8_t*) curr++) = x2; + *((uint8_t*) curr++) = x3; + set_end(curr); + } void emit_int32( int32_t x) { *((int32_t*) end()) = x; set_end(end() + sizeof(int32_t)); } void emit_int64( int64_t x) { *((int64_t*) end()) = x; set_end(end() + sizeof(int64_t)); } diff --git a/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java b/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java index 1a5475403..b52c6f523 100644 --- a/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java +++ b/hotspot/test/compiler/intrinsics/IntrinsicAvailableTest.java @@ -26,7 +26,7 @@ import java.util.Objects; /* * @test * @bug 8130832 - * @library /testlibrary /../../test/lib /compiler/whitebox /compiler/testlibrary + * @library /testlibrary /testlibrary/whitebox /compiler/whitebox /compiler/testlibrary * @build IntrinsicAvailableTest * @run main ClassFileInstaller sun.hotspot.WhiteBox * sun.hotspot.WhiteBox$WhiteBoxPermission diff --git a/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedX86CPU.java b/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedX86CPU.java index 8a8dde4ad..a916ac746 100644 --- a/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedX86CPU.java +++ b/hotspot/test/compiler/intrinsics/sha/cli/testcases/GenericTestCaseForUnsupportedX86CPU.java @@ -24,7 +24,9 @@ import com.oracle.java.testlibrary.ExitCode; import com.oracle.java.testlibrary.Platform; import com.oracle.java.testlibrary.cli.CommandLineOptionTest; +import com.oracle.java.testlibrary.cli.predicate.AndPredicate; import com.oracle.java.testlibrary.cli.predicate.OrPredicate; +import com.oracle.java.testlibrary.cli.predicate.NotPredicate; /** * Generic test case for SHA-related options targeted to X86 CPUs that don't @@ -33,7 +35,9 @@ import com.oracle.java.testlibrary.cli.predicate.OrPredicate; public class GenericTestCaseForUnsupportedX86CPU extends DigestOptionsBase.TestCase { public GenericTestCaseForUnsupportedX86CPU(String optionName) { - super(optionName, new OrPredicate(Platform::isX64, Platform::isX86)); + super(optionName, new AndPredicate(new OrPredicate(Platform::isX64, Platform::isX86), + new NotPredicate(DigestOptionsBase.getPredicateForOption( + optionName)))); } @Override -- 2.22.0
Locations
Projects
Search
Status Monitor
Help
Open Build Service
OBS Manuals
API Documentation
OBS Portal
Reporting a Bug
Contact
Mailing List
Forums
Chat (IRC)
Twitter
Open Build Service (OBS)
is an
openSUSE project
.
浙ICP备2022010568号-2