From 6789b2dc455b90efc9c88886c9366adc9abb7347 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:52:27 -0700 Subject: [PADLOCK] Move fast path work into aes_set_key and upper layer Most of the work done aes_padlock can be done in aes_set_key. This means that we only have to do it once when the key changes rather than every time we perform an encryption or decryption. This patch also sets cra_alignmask to let the upper layer ensure that the buffers fed to us are aligned correctly. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/crypto/padlock-aes.c | 102 +++++++++++++++++-------------------------- drivers/crypto/padlock.h | 22 +++++----- 2 files changed, 52 insertions(+), 72 deletions(-) (limited to 'drivers') diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index ed708b4427b..5f28909d401 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include "padlock.h" @@ -59,8 +60,12 @@ #define AES_EXTENDED_KEY_SIZE_B (AES_EXTENDED_KEY_SIZE * sizeof(uint32_t)) struct aes_ctx { - uint32_t e_data[AES_EXTENDED_KEY_SIZE+4]; - uint32_t d_data[AES_EXTENDED_KEY_SIZE+4]; + uint32_t e_data[AES_EXTENDED_KEY_SIZE]; + uint32_t d_data[AES_EXTENDED_KEY_SIZE]; + struct { + struct cword encrypt; + struct cword decrypt; + } cword; uint32_t *E; uint32_t *D; int key_length; @@ -280,10 +285,15 @@ aes_hw_extkey_available(uint8_t key_len) return 0; } +static inline struct aes_ctx *aes_ctx(void *ctx) +{ + return (struct aes_ctx *)ALIGN((unsigned long)ctx, PADLOCK_ALIGNMENT); +} + static int aes_set_key(void *ctx_arg, const uint8_t *in_key, unsigned int key_len, uint32_t *flags) { - struct aes_ctx *ctx = ctx_arg; + struct aes_ctx *ctx = aes_ctx(ctx_arg); uint32_t i, t, u, v, w; uint32_t P[AES_EXTENDED_KEY_SIZE]; uint32_t rounds; @@ -295,25 +305,36 @@ aes_set_key(void *ctx_arg, const uint8_t *in_key, unsigned int key_len, uint32_t ctx->key_length = key_len; + /* + * If the hardware is capable of generating the extended key + * itself we must supply the plain key for both encryption + * and decryption. + */ ctx->E = ctx->e_data; - ctx->D = ctx->d_data; - - /* Ensure 16-Bytes alignmentation of keys for VIA PadLock. */ - if ((int)(ctx->e_data) & 0x0F) - ctx->E += 4 - (((int)(ctx->e_data) & 0x0F) / sizeof (ctx->e_data[0])); - - if ((int)(ctx->d_data) & 0x0F) - ctx->D += 4 - (((int)(ctx->d_data) & 0x0F) / sizeof (ctx->d_data[0])); + ctx->D = ctx->e_data; E_KEY[0] = uint32_t_in (in_key); E_KEY[1] = uint32_t_in (in_key + 4); E_KEY[2] = uint32_t_in (in_key + 8); E_KEY[3] = uint32_t_in (in_key + 12); + /* Prepare control words. */ + memset(&ctx->cword, 0, sizeof(ctx->cword)); + + ctx->cword.decrypt.encdec = 1; + ctx->cword.encrypt.rounds = 10 + (key_len - 16) / 4; + ctx->cword.decrypt.rounds = ctx->cword.encrypt.rounds; + ctx->cword.encrypt.ksize = (key_len - 16) / 8; + ctx->cword.decrypt.ksize = ctx->cword.encrypt.ksize; + /* Don't generate extended keys if the hardware can do it. */ if (aes_hw_extkey_available(key_len)) return 0; + ctx->D = ctx->d_data; + ctx->cword.encrypt.keygen = 1; + ctx->cword.decrypt.keygen = 1; + switch (key_len) { case 16: t = E_KEY[3]; @@ -370,9 +391,8 @@ aes_set_key(void *ctx_arg, const uint8_t *in_key, unsigned int key_len, uint32_t /* ====== Encryption/decryption routines ====== */ /* This is the real call to PadLock. */ -static inline void -padlock_xcrypt_ecb(uint8_t *input, uint8_t *output, uint8_t *key, - void *control_word, uint32_t count) +static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, + void *control_word, u32 count) { asm volatile ("pushfl; popfl"); /* enforce key reload. */ asm volatile (".byte 0xf3,0x0f,0xa7,0xc8" /* rep xcryptecb */ @@ -380,67 +400,27 @@ padlock_xcrypt_ecb(uint8_t *input, uint8_t *output, uint8_t *key, : "d"(control_word), "b"(key), "c"(count)); } -static void -aes_padlock(void *ctx_arg, uint8_t *out_arg, const uint8_t *in_arg, int encdec) -{ - /* Don't blindly modify this structure - the items must - fit on 16-Bytes boundaries! */ - struct padlock_xcrypt_data { - uint8_t buf[AES_BLOCK_SIZE]; - union cword cword; - }; - - struct aes_ctx *ctx = ctx_arg; - char bigbuf[sizeof(struct padlock_xcrypt_data) + 16]; - struct padlock_xcrypt_data *data; - void *key; - - /* Place 'data' at the first 16-Bytes aligned address in 'bigbuf'. */ - if (((long)bigbuf) & 0x0F) - data = (void*)(bigbuf + 16 - ((long)bigbuf & 0x0F)); - else - data = (void*)bigbuf; - - /* Prepare Control word. */ - memset (data, 0, sizeof(struct padlock_xcrypt_data)); - data->cword.b.encdec = !encdec; /* in the rest of cryptoapi ENC=1/DEC=0 */ - data->cword.b.rounds = 10 + (ctx->key_length - 16) / 4; - data->cword.b.ksize = (ctx->key_length - 16) / 8; - - /* Is the hardware capable to generate the extended key? */ - if (!aes_hw_extkey_available(ctx->key_length)) - data->cword.b.keygen = 1; - - /* ctx->E starts with a plain key - if the hardware is capable - to generate the extended key itself we must supply - the plain key for both Encryption and Decryption. */ - if (encdec == CRYPTO_DIR_ENCRYPT || data->cword.b.keygen == 0) - key = ctx->E; - else - key = ctx->D; - - memcpy(data->buf, in_arg, AES_BLOCK_SIZE); - padlock_xcrypt_ecb(data->buf, data->buf, key, &data->cword, 1); - memcpy(out_arg, data->buf, AES_BLOCK_SIZE); -} - static void aes_encrypt(void *ctx_arg, uint8_t *out, const uint8_t *in) { - aes_padlock(ctx_arg, out, in, CRYPTO_DIR_ENCRYPT); + struct aes_ctx *ctx = aes_ctx(ctx_arg); + padlock_xcrypt_ecb(in, out, ctx->E, &ctx->cword.encrypt, 1); } static void aes_decrypt(void *ctx_arg, uint8_t *out, const uint8_t *in) { - aes_padlock(ctx_arg, out, in, CRYPTO_DIR_DECRYPT); + struct aes_ctx *ctx = aes_ctx(ctx_arg); + padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt, 1); } static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aes_ctx), + .cra_ctxsize = sizeof(struct aes_ctx) + + PADLOCK_ALIGNMENT, + .cra_alignmask = PADLOCK_ALIGNMENT - 1, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), .cra_u = { diff --git a/drivers/crypto/padlock.h b/drivers/crypto/padlock.h index 7a500605e44..3cf2b7a1234 100644 --- a/drivers/crypto/padlock.h +++ b/drivers/crypto/padlock.h @@ -13,18 +13,18 @@ #ifndef _CRYPTO_PADLOCK_H #define _CRYPTO_PADLOCK_H +#define PADLOCK_ALIGNMENT 16 + /* Control word. */ -union cword { - uint32_t cword[4]; - struct { - int rounds:4; - int algo:3; - int keygen:1; - int interm:1; - int encdec:1; - int ksize:2; - } b; -}; +struct cword { + int __attribute__ ((__packed__)) + rounds:4, + algo:3, + keygen:1, + interm:1, + encdec:1, + ksize:2; +} __attribute__ ((__aligned__(PADLOCK_ALIGNMENT))); #define PFX "padlock: " -- cgit v1.2.3-70-g09d2 From 28e8c3ad9464de54a632f00ab3df88fa5f4652d1 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:52:43 -0700 Subject: [PADLOCK] Implement multi-block operations By operating on multiple blocks at once, we expect to extract more performance out of the VIA Padlock. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/crypto/padlock-aes.c | 55 ++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 53 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index 5f28909d401..d2745ff4699 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -390,7 +390,7 @@ aes_set_key(void *ctx_arg, const uint8_t *in_key, unsigned int key_len, uint32_t /* ====== Encryption/decryption routines ====== */ -/* This is the real call to PadLock. */ +/* These are the real call to PadLock. */ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, void *control_word, u32 count) { @@ -400,6 +400,17 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, : "d"(control_word), "b"(key), "c"(count)); } +static inline void padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, + u8 *iv, void *control_word, u32 count) +{ + /* Enforce key reload. */ + asm volatile ("pushfl; popfl"); + /* rep xcryptcbc */ + asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" + : "+S" (input), "+D" (output), "+a" (iv) + : "d" (control_word), "b" (key), "c" (count)); +} + static void aes_encrypt(void *ctx_arg, uint8_t *out, const uint8_t *in) { @@ -414,6 +425,42 @@ aes_decrypt(void *ctx_arg, uint8_t *out, const uint8_t *in) padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt, 1); } +static unsigned int aes_encrypt_ecb(const struct cipher_desc *desc, u8 *out, + const u8 *in, unsigned int nbytes) +{ + struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm)); + padlock_xcrypt_ecb(in, out, ctx->E, &ctx->cword.encrypt, + nbytes / AES_BLOCK_SIZE); + return nbytes & ~(AES_BLOCK_SIZE - 1); +} + +static unsigned int aes_decrypt_ecb(const struct cipher_desc *desc, u8 *out, + const u8 *in, unsigned int nbytes) +{ + struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm)); + padlock_xcrypt_ecb(in, out, ctx->D, &ctx->cword.decrypt, + nbytes / AES_BLOCK_SIZE); + return nbytes & ~(AES_BLOCK_SIZE - 1); +} + +static unsigned int aes_encrypt_cbc(const struct cipher_desc *desc, u8 *out, + const u8 *in, unsigned int nbytes) +{ + struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm)); + padlock_xcrypt_cbc(in, out, ctx->E, desc->info, &ctx->cword.encrypt, + nbytes / AES_BLOCK_SIZE); + return nbytes & ~(AES_BLOCK_SIZE - 1); +} + +static unsigned int aes_decrypt_cbc(const struct cipher_desc *desc, u8 *out, + const u8 *in, unsigned int nbytes) +{ + struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm)); + padlock_xcrypt_cbc(in, out, ctx->D, desc->info, &ctx->cword.decrypt, + nbytes / AES_BLOCK_SIZE); + return nbytes & ~(AES_BLOCK_SIZE - 1); +} + static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_flags = CRYPTO_ALG_TYPE_CIPHER, @@ -429,7 +476,11 @@ static struct crypto_alg aes_alg = { .cia_max_keysize = AES_MAX_KEY_SIZE, .cia_setkey = aes_set_key, .cia_encrypt = aes_encrypt, - .cia_decrypt = aes_decrypt + .cia_decrypt = aes_decrypt, + .cia_encrypt_ecb = aes_encrypt_ecb, + .cia_decrypt_ecb = aes_decrypt_ecb, + .cia_encrypt_cbc = aes_encrypt_cbc, + .cia_decrypt_cbc = aes_decrypt_cbc, } } }; -- cgit v1.2.3-70-g09d2 From fbdae9f3e7fb57c07cb0d973f113eb25da2e8ff2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:53:29 -0700 Subject: [CRYPTO] Ensure cit_iv is aligned correctly This patch ensures that cit_iv is aligned according to cra_alignmask by allocating it as part of the tfm structure. As a side effect the crypto layer will also guarantee that the tfm ctx area has enough space to be aligned by cra_alignmask. This allows us to remove the extra space reservation from the Padlock driver. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- crypto/api.c | 32 +++++++++++++++++++++++++++++--- crypto/cipher.c | 15 +++++++++------ crypto/internal.h | 28 ++++++++++++++++++++++++++++ drivers/crypto/padlock-aes.c | 3 +-- include/linux/crypto.h | 5 +++++ 5 files changed, 72 insertions(+), 11 deletions(-) (limited to 'drivers') diff --git a/crypto/api.c b/crypto/api.c index 0b583d24f7f..2d8d828c0ca 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -125,20 +125,46 @@ static void crypto_exit_ops(struct crypto_tfm *tfm) } } +static unsigned int crypto_ctxsize(struct crypto_alg *alg, int flags) +{ + unsigned int len; + + switch (alg->cra_flags & CRYPTO_ALG_TYPE_MASK) { + default: + BUG(); + + case CRYPTO_ALG_TYPE_CIPHER: + len = crypto_cipher_ctxsize(alg, flags); + break; + + case CRYPTO_ALG_TYPE_DIGEST: + len = crypto_digest_ctxsize(alg, flags); + break; + + case CRYPTO_ALG_TYPE_COMPRESS: + len = crypto_compress_ctxsize(alg, flags); + break; + } + + return len + alg->cra_alignmask; +} + struct crypto_tfm *crypto_alloc_tfm(const char *name, u32 flags) { struct crypto_tfm *tfm = NULL; struct crypto_alg *alg; + unsigned int tfm_size; alg = crypto_alg_mod_lookup(name); if (alg == NULL) goto out; - - tfm = kmalloc(sizeof(*tfm) + alg->cra_ctxsize, GFP_KERNEL); + + tfm_size = sizeof(*tfm) + crypto_ctxsize(alg, flags); + tfm = kmalloc(tfm_size, GFP_KERNEL); if (tfm == NULL) goto out_put; - memset(tfm, 0, sizeof(*tfm) + alg->cra_ctxsize); + memset(tfm, 0, tfm_size); tfm->__crt_alg = alg; diff --git a/crypto/cipher.c b/crypto/cipher.c index 85eb12f8e56..d3295ce14a5 100644 --- a/crypto/cipher.c +++ b/crypto/cipher.c @@ -41,7 +41,7 @@ static unsigned int crypt_slow(const struct cipher_desc *desc, struct scatter_walk *in, struct scatter_walk *out, unsigned int bsize) { - unsigned int alignmask = desc->tfm->__crt_alg->cra_alignmask; + unsigned int alignmask = crypto_tfm_alg_alignmask(desc->tfm); u8 buffer[bsize * 2 + alignmask]; u8 *src = (u8 *)ALIGN((unsigned long)buffer, alignmask + 1); u8 *dst = src + bsize; @@ -98,7 +98,7 @@ static int crypt(const struct cipher_desc *desc, struct scatter_walk walk_in, walk_out; struct crypto_tfm *tfm = desc->tfm; const unsigned int bsize = crypto_tfm_alg_blocksize(tfm); - unsigned int alignmask = tfm->__crt_alg->cra_alignmask; + unsigned int alignmask = crypto_tfm_alg_alignmask(tfm); unsigned long buffer = 0; if (!nbytes) @@ -399,6 +399,8 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm) } if (ops->cit_mode == CRYPTO_TFM_MODE_CBC) { + unsigned int align; + unsigned long addr; switch (crypto_tfm_alg_blocksize(tfm)) { case 8: @@ -418,9 +420,11 @@ int crypto_init_cipher_ops(struct crypto_tfm *tfm) } ops->cit_ivsize = crypto_tfm_alg_blocksize(tfm); - ops->cit_iv = kmalloc(ops->cit_ivsize, GFP_KERNEL); - if (ops->cit_iv == NULL) - ret = -ENOMEM; + align = crypto_tfm_alg_alignmask(tfm) + 1; + addr = (unsigned long)crypto_tfm_ctx(tfm); + addr = ALIGN(addr, align); + addr += ALIGN(tfm->__crt_alg->cra_ctxsize, align); + ops->cit_iv = (void *)addr; } out: @@ -429,5 +433,4 @@ out: void crypto_exit_cipher_ops(struct crypto_tfm *tfm) { - kfree(tfm->crt_cipher.cit_iv); } diff --git a/crypto/internal.h b/crypto/internal.h index 83b1b6d6d92..68612874b5f 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -16,6 +16,7 @@ #include #include #include +#include #include extern enum km_type crypto_km_types[]; @@ -61,6 +62,33 @@ static inline void crypto_init_proc(void) { } #endif +static inline unsigned int crypto_digest_ctxsize(struct crypto_alg *alg, + int flags) +{ + return alg->cra_ctxsize; +} + +static inline unsigned int crypto_cipher_ctxsize(struct crypto_alg *alg, + int flags) +{ + unsigned int len = alg->cra_ctxsize; + + switch (flags & CRYPTO_TFM_MODE_MASK) { + case CRYPTO_TFM_MODE_CBC: + len = ALIGN(len, alg->cra_alignmask + 1); + len += alg->cra_blocksize; + break; + } + + return len; +} + +static inline unsigned int crypto_compress_ctxsize(struct crypto_alg *alg, + int flags) +{ + return alg->cra_ctxsize; +} + int crypto_init_digest_flags(struct crypto_tfm *tfm, u32 flags); int crypto_init_cipher_flags(struct crypto_tfm *tfm, u32 flags); int crypto_init_compress_flags(struct crypto_tfm *tfm, u32 flags); diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index d2745ff4699..c5b58fae95f 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -465,8 +465,7 @@ static struct crypto_alg aes_alg = { .cra_name = "aes", .cra_flags = CRYPTO_ALG_TYPE_CIPHER, .cra_blocksize = AES_BLOCK_SIZE, - .cra_ctxsize = sizeof(struct aes_ctx) + - PADLOCK_ALIGNMENT, + .cra_ctxsize = sizeof(struct aes_ctx), .cra_alignmask = PADLOCK_ALIGNMENT - 1, .cra_module = THIS_MODULE, .cra_list = LIST_HEAD_INIT(aes_alg.cra_list), diff --git a/include/linux/crypto.h b/include/linux/crypto.h index ac9d49beecd..5e2bcc636a0 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -288,6 +288,11 @@ static inline unsigned int crypto_tfm_alg_digestsize(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_digest.dia_digestsize; } +static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_alignmask; +} + static inline void *crypto_tfm_ctx(struct crypto_tfm *tfm) { return (void *)&tfm[1]; -- cgit v1.2.3-70-g09d2 From 476df259cd577e20379b02a7f7ffd086ea925a83 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 6 Jul 2005 13:54:09 -0700 Subject: [CRYPTO] Update IV correctly for Padlock CBC encryption When the Padlock does CBC encryption, the memory pointed to by EAX is not updated at all. Instead, it updates the value of EAX by pointing it to the last block in the output. Therefore to maintain the correct semantics we need to copy the IV. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- drivers/crypto/padlock-aes.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'drivers') diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c index c5b58fae95f..71407c578af 100644 --- a/drivers/crypto/padlock-aes.c +++ b/drivers/crypto/padlock-aes.c @@ -400,8 +400,8 @@ static inline void padlock_xcrypt_ecb(const u8 *input, u8 *output, void *key, : "d"(control_word), "b"(key), "c"(count)); } -static inline void padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, - u8 *iv, void *control_word, u32 count) +static inline u8 *padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, + u8 *iv, void *control_word, u32 count) { /* Enforce key reload. */ asm volatile ("pushfl; popfl"); @@ -409,6 +409,7 @@ static inline void padlock_xcrypt_cbc(const u8 *input, u8 *output, void *key, asm volatile (".byte 0xf3,0x0f,0xa7,0xd0" : "+S" (input), "+D" (output), "+a" (iv) : "d" (control_word), "b" (key), "c" (count)); + return iv; } static void @@ -447,8 +448,12 @@ static unsigned int aes_encrypt_cbc(const struct cipher_desc *desc, u8 *out, const u8 *in, unsigned int nbytes) { struct aes_ctx *ctx = aes_ctx(crypto_tfm_ctx(desc->tfm)); - padlock_xcrypt_cbc(in, out, ctx->E, desc->info, &ctx->cword.encrypt, - nbytes / AES_BLOCK_SIZE); + u8 *iv; + + iv = padlock_xcrypt_cbc(in, out, ctx->E, desc->info, + &ctx->cword.encrypt, nbytes / AES_BLOCK_SIZE); + memcpy(desc->info, iv, AES_BLOCK_SIZE); + return nbytes & ~(AES_BLOCK_SIZE - 1); } -- cgit v1.2.3-70-g09d2