
Modular multiplication using OpenCL
AndreasStahl Mar 26, 2010 11:34 AM (in response to iLoop)Hi iLoop, I fear we need more information to help you out. What is the exact problem you are facing? Could you share some of your source code?
And how do you want to parallelize the computation? What do you want to accomplish?

Modular multiplication using OpenCL
iLoop Mar 26, 2010 4:18 PM (in response to AndreasStahl)Hi
Im trying to use OpenSSL to generate a RSA key pair like this:
RSA *rsa=NULL;
int RSAKeySize = 1024; ( like to be able to use 2048 and 4096 as well )
rsa = RSA_generate_key(RSAKeySize, 65537, NULL, NULL);
Then i need to send the rsa>n and rsa>e values to the GPU and use the values in encryption a message m ( m^e mod n).
I have found some samples as CUDA code put i hope someone is able to help me put it all together.
What i hope for is to be able to run this RSA encryption is parallel to speed up async encryption.
typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; typedef unsigned long long u64; // Compute the 2adic inverse of x // Simple Newton iteration, cf. exercise 9.12 of C&P 2nd ed. __kernel u32 inv2adic(u32 x) { u32 a; a = x; x = (((x+2)&4)<<1)+x; // correct to 4 bits x *= 2  a*x; // 8 bits x *= 2  a*x; // 16 bits x *= 2  a*x; // 32 bits return x; // x^1 mod 2^32 } __kernel u32 addmul_1(u32 *z, u32 *x, u32 y,int N) { int i; u32 cy; u64 prod; cy = 0; //#pragma unroll for(i=0; i < (N+1); ++i) { prod = (u64)x[i]*(u64)y; prod += cy; prod += z[i]; z[i] = (u32)prod; cy = (u32)(prod>>32); } return cy; } __kernel void redc(u32 *z, u32 *x, u32 *n, const u32 d, u32 *t,int N) { int i,j; u32 m; for(i=0; i < (N+1); ++i) t[i] = x[i]; t[(N+1)] = 0; for(i=0; i < (N+1); ++i) { m = t[0]*d; t[(N+1)] = addmul_1(t, n, m,N); for(j=0; j < (N+1); ++j) // shift t[j] = t[j+1]; t[(N+1)] = 0; } for(i=0; i < (N+1); ++i) z[i] = t[i]; } __kernel void mulredc(u32 *z, u32 *x, u32 *y, u32 *n, const u32 d, u32 *t,int N) { int i,j; u32 m, cy; u64 aux; #pragma unroll for(i=0; i < (N+3); ++i) t[i] = 0; #pragma unroll for(i=0; i < (N+1); ++i) { cy = addmul_1(t, x, y[i],N); // t += x*y[i] aux = (u64)t[(N+1)] + cy; t[(N+1)] = aux; t[(N+2)] = aux>>32; m = t[0]*d; cy = addmul_1(t, n, m,N); aux = (u64)t[(N+1)] + cy; t[(N+1)] = aux; t[(N+2)] = aux>>32; for(j=0; j <= (N+1); ++j) // shift t[j] = t[j+1]; } for(i=0; i < (N+1); ++i) z[i] = t[i]; } __kernel void sqrredc(u32 *z, u32 *x, u32 *n, const u32 d, u32 *t,int N) { int i,j; u32 m, cy; u64 aux, cc; for(i=0; i < (N+3); ++i) t[i] = 0; //#pragma unroll for(i=0; i < (N+1); ++i) { u64 pr; cc = 0; cy = 0; pr = (u64)x[i]*(u64)x[i]; pr += t[i]; pr += cy; t[i] = (u32)pr; cy = (pr>>32); cc = cy; for(j=i+1; j < (N+1); ++j) { u64 ec; pr = (u64)x[j]*(u64)x[i]; ec = pr>>63; pr <<= 1; aux = pr + t[j]; ec += aux<pr; pr = aux; aux = pr + cc; ec += aux<pr; pr = aux; t[j] = (u32)pr; cc = (pr>>32)(ec<<32); } aux = (u64)t[(N+1)] + cc; t[(N+1)] = aux; t[(N+2)] = aux>>32; m = t[0]*d; cy = addmul_1(t, n, m,N); aux = (u64)t[(N+1)] + cy; t[(N+1)] = aux; t[(N+2)] = aux>>32; for(j=0; j <= (N+1); ++j) // shift t[j] = t[j+1]; } for(i=0; i < (N+1); ++i) z[i] = t[i]; }

Modular multiplication using OpenCL
kang13 May 14, 2010 4:38 PM (in response to iLoop)May I ask where you find these code? they looks like just some alrithmetic functions, not complete RSA implementation.
Originally posted by: iLoop Hi
Im trying to use OpenSSL to generate a RSA key pair like this:
RSA *rsa=NULL;

