FileDocCategorySizeDatePackage
AESFastEngine.javaAPI DocAndroid 1.5 API46716Wed May 06 22:41:06 BST 2009org.bouncycastle.crypto.engines

AESFastEngine

public class AESFastEngine extends Object implements org.bouncycastle.crypto.BlockCipher
an implementation of the AES (Rijndael), from FIPS-197.

For further details see: http://csrc.nist.gov/encryption/aes/. This implementation is based on optimizations from Dr. Brian Gladman's paper and C code at http://fp.gladman.plus.com/cryptography_technology/rijndael/ There are three levels of tradeoff of speed vs memory Because java has no preprocessor, they are written as three separate classes from which to choose The fastest uses 8Kbytes of static tables to precompute round calculations, 4 256 word tables for encryption and 4 for decryption. The middle performance version uses only one 256 word table for each, for a total of 2Kbytes, adding 12 rotate operations per round to compute the values contained in the other tables from the contents of the first The slowest version uses no static tables at all and computes the values in each round

This file contains the fast version with 8Kbytes of static tables for round precomputation

Fields Summary
private static final byte[]
S
private static final byte[]
Si
private static final int[]
rcon
private static final int[]
T0
private static final int[]
T1
private static final int[]
T2
private static final int[]
T3
private static final int[]
Tinv0
private static final int[]
Tinv1
private static final int[]
Tinv2
private static final int[]
Tinv3
private static final int
m1
private static final int
m2
private static final int
m3
private int
ROUNDS
private int[]
WorkingKey
private int
C0
private int
C1
private int
C2
private int
C3
private boolean
forEncryption
private static final int
BLOCK_SIZE
Constructors Summary
public AESFastEngine()
default constructor - 128 bit block size.


                
     
    
    
Methods Summary
private intFFmulX(int x)


       
    
        return (((x & m2) << 1) ^ (((x & m1) >>> 7) * m3));
    
private final voiddecryptBlock(int[][] KW)

        int r0, r1, r2, r3;

        C0 ^= KW[ROUNDS][0];
        C1 ^= KW[ROUNDS][1];
        C2 ^= KW[ROUNDS][2];
        C3 ^= KW[ROUNDS][3];

        int r = ROUNDS-1; 
        
        while (r>1) 
        {
            r0 = Tinv0[C0&255] ^ Tinv1[(C3>>8)&255] ^ Tinv2[(C2>>16)&255] ^ Tinv3[(C1>>24)&255] ^ KW[r][0];
            r1 = Tinv0[C1&255] ^ Tinv1[(C0>>8)&255] ^ Tinv2[(C3>>16)&255] ^ Tinv3[(C2>>24)&255] ^ KW[r][1];
            r2 = Tinv0[C2&255] ^ Tinv1[(C1>>8)&255] ^ Tinv2[(C0>>16)&255] ^ Tinv3[(C3>>24)&255] ^ KW[r][2];
            r3 = Tinv0[C3&255] ^ Tinv1[(C2>>8)&255] ^ Tinv2[(C1>>16)&255] ^ Tinv3[(C0>>24)&255] ^ KW[r--][3];
            C0 = Tinv0[r0&255] ^ Tinv1[(r3>>8)&255] ^ Tinv2[(r2>>16)&255] ^ Tinv3[(r1>>24)&255] ^ KW[r][0];
            C1 = Tinv0[r1&255] ^ Tinv1[(r0>>8)&255] ^ Tinv2[(r3>>16)&255] ^ Tinv3[(r2>>24)&255] ^ KW[r][1];
            C2 = Tinv0[r2&255] ^ Tinv1[(r1>>8)&255] ^ Tinv2[(r0>>16)&255] ^ Tinv3[(r3>>24)&255] ^ KW[r][2];
            C3 = Tinv0[r3&255] ^ Tinv1[(r2>>8)&255] ^ Tinv2[(r1>>16)&255] ^ Tinv3[(r0>>24)&255] ^ KW[r--][3];
        }

        r0 = Tinv0[C0&255] ^ Tinv1[(C3>>8)&255] ^ Tinv2[(C2>>16)&255] ^ Tinv3[(C1>>24)&255] ^ KW[r][0];
        r1 = Tinv0[C1&255] ^ Tinv1[(C0>>8)&255] ^ Tinv2[(C3>>16)&255] ^ Tinv3[(C2>>24)&255] ^ KW[r][1];
        r2 = Tinv0[C2&255] ^ Tinv1[(C1>>8)&255] ^ Tinv2[(C0>>16)&255] ^ Tinv3[(C3>>24)&255] ^ KW[r][2];
        r3 = Tinv0[C3&255] ^ Tinv1[(C2>>8)&255] ^ Tinv2[(C1>>16)&255] ^ Tinv3[(C0>>24)&255] ^ KW[r--][3];
        
        // the final round's table is a simple function of Si so we don't use a whole other four tables for it

        C0 = (Si[r0&255]&255) ^ ((Si[(r3>>8)&255]&255)<<8) ^ ((Si[(r2>>16)&255]&255)<<16) ^ (Si[(r1>>24)&255]<<24) ^ KW[0][0];
        C1 = (Si[r1&255]&255) ^ ((Si[(r0>>8)&255]&255)<<8) ^ ((Si[(r3>>16)&255]&255)<<16) ^ (Si[(r2>>24)&255]<<24) ^ KW[0][1];
        C2 = (Si[r2&255]&255) ^ ((Si[(r1>>8)&255]&255)<<8) ^ ((Si[(r0>>16)&255]&255)<<16) ^ (Si[(r3>>24)&255]<<24) ^ KW[0][2];
        C3 = (Si[r3&255]&255) ^ ((Si[(r2>>8)&255]&255)<<8) ^ ((Si[(r1>>16)&255]&255)<<16) ^ (Si[(r0>>24)&255]<<24) ^ KW[0][3];
    
private final voidencryptBlock(int[][] KW)

        int r, r0, r1, r2, r3;
        
        C0 ^= KW[0][0];
        C1 ^= KW[0][1];
        C2 ^= KW[0][2];
        C3 ^= KW[0][3];

        r = 1;
        while (r < ROUNDS - 1)
        {
            r0 = T0[C0&255] ^ T1[(C1>>8)&255] ^ T2[(C2>>16)&255] ^ T3[(C3>>24)&255] ^ KW[r][0];
            r1 = T0[C1&255] ^ T1[(C2>>8)&255] ^ T2[(C3>>16)&255] ^ T3[(C0>>24)&255] ^ KW[r][1];
            r2 = T0[C2&255] ^ T1[(C3>>8)&255] ^ T2[(C0>>16)&255] ^ T3[(C1>>24)&255] ^ KW[r][2];
            r3 = T0[C3&255] ^ T1[(C0>>8)&255] ^ T2[(C1>>16)&255] ^ T3[(C2>>24)&255] ^ KW[r++][3];
            C0 = T0[r0&255] ^ T1[(r1>>8)&255] ^ T2[(r2>>16)&255] ^ T3[(r3>>24)&255] ^ KW[r][0];
            C1 = T0[r1&255] ^ T1[(r2>>8)&255] ^ T2[(r3>>16)&255] ^ T3[(r0>>24)&255] ^ KW[r][1];
            C2 = T0[r2&255] ^ T1[(r3>>8)&255] ^ T2[(r0>>16)&255] ^ T3[(r1>>24)&255] ^ KW[r][2];
            C3 = T0[r3&255] ^ T1[(r0>>8)&255] ^ T2[(r1>>16)&255] ^ T3[(r2>>24)&255] ^ KW[r++][3];
        }

        r0 = T0[C0&255] ^ T1[(C1>>8)&255] ^ T2[(C2>>16)&255] ^ T3[(C3>>24)&255] ^ KW[r][0];
        r1 = T0[C1&255] ^ T1[(C2>>8)&255] ^ T2[(C3>>16)&255] ^ T3[(C0>>24)&255] ^ KW[r][1];
        r2 = T0[C2&255] ^ T1[(C3>>8)&255] ^ T2[(C0>>16)&255] ^ T3[(C1>>24)&255] ^ KW[r][2];
        r3 = T0[C3&255] ^ T1[(C0>>8)&255] ^ T2[(C1>>16)&255] ^ T3[(C2>>24)&255] ^ KW[r++][3];
        
        // the final round's table is a simple function of S so we don't use a whole other four tables for it

        C0 = (S[r0&255]&255) ^ ((S[(r1>>8)&255]&255)<<8) ^ ((S[(r2>>16)&255]&255)<<16) ^ (S[(r3>>24)&255]<<24) ^ KW[r][0];
        C1 = (S[r1&255]&255) ^ ((S[(r2>>8)&255]&255)<<8) ^ ((S[(r3>>16)&255]&255)<<16) ^ (S[(r0>>24)&255]<<24) ^ KW[r][1];
        C2 = (S[r2&255]&255) ^ ((S[(r3>>8)&255]&255)<<8) ^ ((S[(r0>>16)&255]&255)<<16) ^ (S[(r1>>24)&255]<<24) ^ KW[r][2];
        C3 = (S[r3&255]&255) ^ ((S[(r0>>8)&255]&255)<<8) ^ ((S[(r1>>16)&255]&255)<<16) ^ (S[(r2>>24)&255]<<24) ^ KW[r][3];

    
private int[][]generateWorkingKey(byte[] key, boolean forEncryption)
Calculate the necessary round keys The number of calculations depends on key size and block size AES specified a fixed block size of 128 bits and key sizes 128/192/256 bits This code is written assuming those are the only possible values

        int         KC = key.length / 4;  // key length in words
        int         t;
        
        if (((KC != 4) && (KC != 6) && (KC != 8)) || ((KC * 4) != key.length))
        {
            throw new IllegalArgumentException("Key length not 128/192/256 bits.");
        }

        ROUNDS = KC + 6;  // This is not always true for the generalized Rijndael that allows larger block sizes
        int[][] W = new int[ROUNDS+1][4];   // 4 words in a block
        
        //
        // copy the key into the round key array
        //
        
        t = 0;
        int i = 0;
        while (i < key.length)
        {
            W[t >> 2][t & 3] = (key[i]&0xff) | ((key[i+1]&0xff) << 8) | ((key[i+2]&0xff) << 16) | (key[i+3] << 24);
            i+=4;
            t++;
        }
        
        //
        // while not enough round key material calculated
        // calculate new values
        //
        int k = (ROUNDS + 1) << 2;
        for (i = KC; (i < k); i++)
        {
            int temp = W[(i - 1) >> 2][(i - 1) & 3];
            if ((i % KC) == 0)
            {
                temp = subWord(shift(temp, 8)) ^ rcon[(i / KC) - 1];
            }
            else if ((KC > 6) && ((i % KC) == 4))
            {
                temp = subWord(temp);
            }

            W[i >> 2][i & 3] = W[(i - KC) >> 2][(i - KC) & 3] ^ temp;
        }

        if (!forEncryption)
        {
            for (int j = 1; j < ROUNDS; j++)
            {
                for (i = 0; i < 4; i++)
                {
                    W[j][i] = inv_mcol(W[j][i]);
                }
            }
        }

        return W;
    
public java.lang.StringgetAlgorithmName()

        return "AES";
    
public intgetBlockSize()

        return BLOCK_SIZE;
    
public voidinit(boolean forEncryption, org.bouncycastle.crypto.CipherParameters params)
initialise an AES cipher.

param
forEncryption whether or not we are for encryption.
param
params the parameters required to set up the cipher.
exception
IllegalArgumentException if the params argument is inappropriate.

        if (params instanceof KeyParameter)
        {
            WorkingKey = generateWorkingKey(((KeyParameter)params).getKey(), forEncryption);
            this.forEncryption = forEncryption;
            return;
        }

        throw new IllegalArgumentException("invalid parameter passed to AES init - " + params.getClass().getName());
    
private intinv_mcol(int x)

        int f2 = FFmulX(x);
        int f4 = FFmulX(f2);
        int f8 = FFmulX(f4);
        int f9 = x ^ f8;
        
        return f2 ^ f4 ^ f8 ^ shift(f2 ^ f9, 8) ^ shift(f4 ^ f9, 16) ^ shift(f9, 24);
    
private final voidpackBlock(byte[] bytes, int off)

        int     index = off;

        bytes[index++] = (byte)C0;
        bytes[index++] = (byte)(C0 >> 8);
        bytes[index++] = (byte)(C0 >> 16);
        bytes[index++] = (byte)(C0 >> 24);

        bytes[index++] = (byte)C1;
        bytes[index++] = (byte)(C1 >> 8);
        bytes[index++] = (byte)(C1 >> 16);
        bytes[index++] = (byte)(C1 >> 24);

        bytes[index++] = (byte)C2;
        bytes[index++] = (byte)(C2 >> 8);
        bytes[index++] = (byte)(C2 >> 16);
        bytes[index++] = (byte)(C2 >> 24);

        bytes[index++] = (byte)C3;
        bytes[index++] = (byte)(C3 >> 8);
        bytes[index++] = (byte)(C3 >> 16);
        bytes[index++] = (byte)(C3 >> 24);
    
public intprocessBlock(byte[] in, int inOff, byte[] out, int outOff)

        if (WorkingKey == null)
        {
            throw new IllegalStateException("AES engine not initialised");
        }

        if ((inOff + (32 / 2)) > in.length)
        {
            throw new DataLengthException("input buffer too short");
        }

        if ((outOff + (32 / 2)) > out.length)
        {
            throw new DataLengthException("output buffer too short");
        }

        if (forEncryption)
        {
            unpackBlock(in, inOff);
            encryptBlock(WorkingKey);
            packBlock(out, outOff);
        }
        else
        {
            unpackBlock(in, inOff);
            decryptBlock(WorkingKey);
            packBlock(out, outOff);
        }

        return BLOCK_SIZE;
    
public voidreset()

    
private intshift(int r, int shift)


      
             
             
    
        return (r >>> shift) | (r << -shift);
    
private intsubWord(int x)

        return (S[x&255]&255 | ((S[(x>>8)&255]&255)<<8) | ((S[(x>>16)&255]&255)<<16) | S[(x>>24)&255]<<24);
    
private final voidunpackBlock(byte[] bytes, int off)

        int     index = off;

        C0 = (bytes[index++] & 0xff);
        C0 |= (bytes[index++] & 0xff) << 8;
        C0 |= (bytes[index++] & 0xff) << 16;
        C0 |= bytes[index++] << 24;

        C1 = (bytes[index++] & 0xff);
        C1 |= (bytes[index++] & 0xff) << 8;
        C1 |= (bytes[index++] & 0xff) << 16;
        C1 |= bytes[index++] << 24;

        C2 = (bytes[index++] & 0xff);
        C2 |= (bytes[index++] & 0xff) << 8;
        C2 |= (bytes[index++] & 0xff) << 16;
        C2 |= bytes[index++] << 24;

        C3 = (bytes[index++] & 0xff);
        C3 |= (bytes[index++] & 0xff) << 8;
        C3 |= (bytes[index++] & 0xff) << 16;
        C3 |= bytes[index++] << 24;