From 21a3d3b234cd170deded6beb42409f2a9cd01917 Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Tue, 17 May 2016 10:53:51 -0700 Subject: crypto: qat - fix typos sizeof for ctx The sizeof(*ctx->dec_cd) and sizeof(*ctx->enc_cd) are equal, but we should use the correct one for freeing memory anyway. Signed-off-by: Tadeusz Struk Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_algs.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 1e8852a8a057..769148dbaeb3 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -947,13 +947,13 @@ static int qat_alg_ablkcipher_setkey(struct crypto_ablkcipher *tfm, return 0; out_free_all: - memset(ctx->dec_cd, 0, sizeof(*ctx->enc_cd)); - dma_free_coherent(dev, sizeof(*ctx->enc_cd), + memset(ctx->dec_cd, 0, sizeof(*ctx->dec_cd)); + dma_free_coherent(dev, sizeof(*ctx->dec_cd), ctx->dec_cd, ctx->dec_cd_paddr); ctx->dec_cd = NULL; out_free_enc: - memset(ctx->enc_cd, 0, sizeof(*ctx->dec_cd)); - dma_free_coherent(dev, sizeof(*ctx->dec_cd), + memset(ctx->enc_cd, 0, sizeof(*ctx->enc_cd)); + dma_free_coherent(dev, sizeof(*ctx->enc_cd), ctx->enc_cd, ctx->enc_cd_paddr); ctx->enc_cd = NULL; return -ENOMEM; -- cgit v1.2.3 From bd52f1c23255a7c355268215c3c75aabbe11a67a Mon Sep 17 00:00:00 2001 From: Cristian Stoica Date: Thu, 19 May 2016 18:11:18 +0300 Subject: crypto: caam - fix offset field in hw sg entries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The offset field is 13 bits wide; make sure we don't overwrite more than that in the caam hardware scatter gather structure. Signed-off-by: Cristian Stoica Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/desc.h | 2 +- drivers/crypto/caam/sg_sw_sec4.h | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index 1e93c6af2275..fe30ff69088c 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -20,7 +20,7 @@ #define SEC4_SG_BPID_MASK 0x000000ff #define SEC4_SG_BPID_SHIFT 16 #define SEC4_SG_LEN_MASK 0x3fffffff /* Excludes EXT and FINAL */ -#define SEC4_SG_OFFS_MASK 0x00001fff +#define SEC4_SG_OFFSET_MASK 0x00001fff struct sec4_sg_entry { #ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h index 12ec6616e89d..2311341b7356 100644 --- a/drivers/crypto/caam/sg_sw_sec4.h +++ b/drivers/crypto/caam/sg_sw_sec4.h @@ -11,12 +11,12 @@ struct sec4_sg_entry; * convert single dma address to h/w link table format */ static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr, - dma_addr_t dma, u32 len, u32 offset) + dma_addr_t dma, u32 len, u16 offset) { sec4_sg_ptr->ptr = dma; sec4_sg_ptr->len = len; sec4_sg_ptr->buf_pool_id = 0; - sec4_sg_ptr->offset = offset; + sec4_sg_ptr->offset = offset & SEC4_SG_OFFSET_MASK; #ifdef DEBUG print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ", DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr, @@ -30,7 +30,7 @@ static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr, */ static inline struct sec4_sg_entry * sg_to_sec4_sg(struct scatterlist *sg, int sg_count, - struct sec4_sg_entry *sec4_sg_ptr, u32 offset) + struct sec4_sg_entry *sec4_sg_ptr, u16 offset) { while (sg_count) { dma_to_sec4_sg_one(sec4_sg_ptr, sg_dma_address(sg), @@ -48,7 +48,7 @@ sg_to_sec4_sg(struct scatterlist *sg, int sg_count, */ static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count, struct sec4_sg_entry *sec4_sg_ptr, - u32 offset) + u16 offset) { sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset); sec4_sg_ptr->len |= SEC4_SG_LEN_FIN; -- cgit v1.2.3 From 261ea058f016bc04fa064348ad9bf39d94379381 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Thu, 19 May 2016 18:11:26 +0300 Subject: crypto: caam - handle core endianness != caam endianness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There are SoCs like LS1043A where CAAM endianness (BE) does not match the default endianness of the core (LE). Moreover, there are requirements for the driver to handle cases like CPU_BIG_ENDIAN=y on ARM-based SoCs. This requires for a complete rewrite of the I/O accessors. PPC-specific accessors - {in,out}_{le,be}XX - are replaced with generic ones - io{read,write}[be]XX. Endianness is detected dynamically (at runtime) to allow for multiplatform kernels, for e.g. running the same kernel image on LS1043A (BE CAAM) and LS2080A (LE CAAM) armv8-based SoCs. While here: debugfs entries need to take into consideration the endianness of the core when displaying data. Add the necessary glue code so the entries remain the same, but they are properly read, regardless of the core and/or SEC endianness. Note: pdb.h fixes only what is currently being used (IPsec). Reviewed-by: Tudor Ambarus Signed-off-by: Horia Geantă Signed-off-by: Alex Porosanu Signed-off-by: Herbert Xu --- drivers/crypto/caam/Kconfig | 4 - drivers/crypto/caam/caamhash.c | 5 +- drivers/crypto/caam/ctrl.c | 125 +++++++++++++++++++------------ drivers/crypto/caam/desc.h | 7 +- drivers/crypto/caam/desc_constr.h | 44 +++++++---- drivers/crypto/caam/jr.c | 22 +++--- drivers/crypto/caam/pdb.h | 137 ++++++++++++++++++++++++++-------- drivers/crypto/caam/regs.h | 151 +++++++++++++++++++++++++------------- drivers/crypto/caam/sg_sw_sec4.h | 11 +-- 9 files changed, 340 insertions(+), 166 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index 5652a53415dc..d2c2909a4020 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -116,10 +116,6 @@ config CRYPTO_DEV_FSL_CAAM_IMX def_bool SOC_IMX6 || SOC_IMX7D depends on CRYPTO_DEV_FSL_CAAM -config CRYPTO_DEV_FSL_CAAM_LE - def_bool CRYPTO_DEV_FSL_CAAM_IMX || SOC_LS1021A - depends on CRYPTO_DEV_FSL_CAAM - config CRYPTO_DEV_FSL_CAAM_DEBUG bool "Enable debug output in CAAM driver" depends on CRYPTO_DEV_FSL_CAAM diff --git a/drivers/crypto/caam/caamhash.c b/drivers/crypto/caam/caamhash.c index 5845d4a08797..f1ecc8df8d41 100644 --- a/drivers/crypto/caam/caamhash.c +++ b/drivers/crypto/caam/caamhash.c @@ -847,7 +847,7 @@ static int ahash_update_ctx(struct ahash_request *req) *next_buflen, 0); } else { (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= - SEC4_SG_LEN_FIN; + cpu_to_caam32(SEC4_SG_LEN_FIN); } state->current_buf = !state->current_buf; @@ -949,7 +949,8 @@ static int ahash_final_ctx(struct ahash_request *req) state->buf_dma = try_buf_map_to_sec4_sg(jrdev, edesc->sec4_sg + 1, buf, state->buf_dma, buflen, last_buflen); - (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= SEC4_SG_LEN_FIN; + (edesc->sec4_sg + sec4_sg_src_index - 1)->len |= + cpu_to_caam32(SEC4_SG_LEN_FIN); edesc->sec4_sg_dma = dma_map_single(jrdev, edesc->sec4_sg, sec4_sg_bytes, DMA_TO_DEVICE); diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 5ad5f3009ae0..0ec112ee5204 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -15,6 +15,9 @@ #include "desc_constr.h" #include "error.h" +bool caam_little_end; +EXPORT_SYMBOL(caam_little_end); + /* * i.MX targets tend to have clock control subsystems that can * enable/disable clocking to our device. @@ -106,7 +109,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, if (ctrlpriv->virt_en == 1) { - setbits32(&ctrl->deco_rsr, DECORSR_JR0); + clrsetbits_32(&ctrl->deco_rsr, 0, DECORSR_JR0); while (!(rd_reg32(&ctrl->deco_rsr) & DECORSR_VALID) && --timeout) @@ -115,7 +118,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, timeout = 100000; } - setbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); + clrsetbits_32(&ctrl->deco_rq, 0, DECORR_RQD0ENABLE); while (!(rd_reg32(&ctrl->deco_rq) & DECORR_DEN0) && --timeout) @@ -123,12 +126,12 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, if (!timeout) { dev_err(ctrldev, "failed to acquire DECO 0\n"); - clrbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); + clrsetbits_32(&ctrl->deco_rq, DECORR_RQD0ENABLE, 0); return -ENODEV; } for (i = 0; i < desc_len(desc); i++) - wr_reg32(&deco->descbuf[i], *(desc + i)); + wr_reg32(&deco->descbuf[i], caam32_to_cpu(*(desc + i))); flags = DECO_JQCR_WHL; /* @@ -139,7 +142,7 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, flags |= DECO_JQCR_FOUR; /* Instruct the DECO to execute it */ - setbits32(&deco->jr_ctl_hi, flags); + clrsetbits_32(&deco->jr_ctl_hi, 0, flags); timeout = 10000000; do { @@ -158,10 +161,10 @@ static inline int run_descriptor_deco0(struct device *ctrldev, u32 *desc, DECO_OP_STATUS_HI_ERR_MASK; if (ctrlpriv->virt_en == 1) - clrbits32(&ctrl->deco_rsr, DECORSR_JR0); + clrsetbits_32(&ctrl->deco_rsr, DECORSR_JR0, 0); /* Mark the DECO as free */ - clrbits32(&ctrl->deco_rq, DECORR_RQD0ENABLE); + clrsetbits_32(&ctrl->deco_rq, DECORR_RQD0ENABLE, 0); if (!timeout) return -EAGAIN; @@ -349,7 +352,7 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) r4tst = &ctrl->r4tst[0]; /* put RNG4 into program mode */ - setbits32(&r4tst->rtmctl, RTMCTL_PRGM); + clrsetbits_32(&r4tst->rtmctl, 0, RTMCTL_PRGM); /* * Performance-wise, it does not make sense to @@ -363,7 +366,7 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) >> RTSDCTL_ENT_DLY_SHIFT; if (ent_delay <= val) { /* put RNG4 into run mode */ - clrbits32(&r4tst->rtmctl, RTMCTL_PRGM); + clrsetbits_32(&r4tst->rtmctl, RTMCTL_PRGM, 0); return; } @@ -381,9 +384,9 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) * select raw sampling in both entropy shifter * and statistical checker */ - setbits32(&val, RTMCTL_SAMP_MODE_RAW_ES_SC); + clrsetbits_32(&val, 0, RTMCTL_SAMP_MODE_RAW_ES_SC); /* put RNG4 into run mode */ - clrbits32(&val, RTMCTL_PRGM); + clrsetbits_32(&val, RTMCTL_PRGM, 0); /* write back the control register */ wr_reg32(&r4tst->rtmctl, val); } @@ -406,6 +409,23 @@ int caam_get_era(void) } EXPORT_SYMBOL(caam_get_era); +#ifdef CONFIG_DEBUG_FS +static int caam_debugfs_u64_get(void *data, u64 *val) +{ + *val = caam64_to_cpu(*(u64 *)data); + return 0; +} + +static int caam_debugfs_u32_get(void *data, u64 *val) +{ + *val = caam32_to_cpu(*(u32 *)data); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u32_ro, caam_debugfs_u32_get, NULL, "%llu\n"); +DEFINE_SIMPLE_ATTRIBUTE(caam_fops_u64_ro, caam_debugfs_u64_get, NULL, "%llu\n"); +#endif + /* Probe routine for CAAM top (controller) level */ static int caam_probe(struct platform_device *pdev) { @@ -504,6 +524,10 @@ static int caam_probe(struct platform_device *pdev) ret = -ENOMEM; goto disable_caam_emi_slow; } + + caam_little_end = !(bool)(rd_reg32(&ctrl->perfmon.status) & + (CSTA_PLEND | CSTA_ALT_PLEND)); + /* Finding the page size for using the CTPR_MS register */ comp_params = rd_reg32(&ctrl->perfmon.comp_parms_ms); pg_size = (comp_params & CTPR_MS_PG_SZ_MASK) >> CTPR_MS_PG_SZ_SHIFT; @@ -559,9 +583,9 @@ static int caam_probe(struct platform_device *pdev) } if (ctrlpriv->virt_en == 1) - setbits32(&ctrl->jrstart, JRSTART_JR0_START | - JRSTART_JR1_START | JRSTART_JR2_START | - JRSTART_JR3_START); + clrsetbits_32(&ctrl->jrstart, 0, JRSTART_JR0_START | + JRSTART_JR1_START | JRSTART_JR2_START | + JRSTART_JR3_START); if (sizeof(dma_addr_t) == sizeof(u64)) if (of_device_is_compatible(nprop, "fsl,sec-v5.0")) @@ -693,7 +717,7 @@ static int caam_probe(struct platform_device *pdev) ctrlpriv->rng4_sh_init = ~ctrlpriv->rng4_sh_init & RDSTA_IFMASK; /* Enable RDB bit so that RNG works faster */ - setbits32(&ctrl->scfgr, SCFGR_RDBENABLE); + clrsetbits_32(&ctrl->scfgr, 0, SCFGR_RDBENABLE); } /* NOTE: RTIC detection ought to go here, around Si time */ @@ -719,48 +743,59 @@ static int caam_probe(struct platform_device *pdev) ctrlpriv->ctl = debugfs_create_dir("ctl", ctrlpriv->dfs_root); /* Controller-level - performance monitor counters */ + ctrlpriv->ctl_rq_dequeued = - debugfs_create_u64("rq_dequeued", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->req_dequeued); + debugfs_create_file("rq_dequeued", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->req_dequeued, + &caam_fops_u64_ro); ctrlpriv->ctl_ob_enc_req = - debugfs_create_u64("ob_rq_encrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ob_enc_req); + debugfs_create_file("ob_rq_encrypted", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ob_enc_req, + &caam_fops_u64_ro); ctrlpriv->ctl_ib_dec_req = - debugfs_create_u64("ib_rq_decrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ib_dec_req); + debugfs_create_file("ib_rq_decrypted", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ib_dec_req, + &caam_fops_u64_ro); ctrlpriv->ctl_ob_enc_bytes = - debugfs_create_u64("ob_bytes_encrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ob_enc_bytes); + debugfs_create_file("ob_bytes_encrypted", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ob_enc_bytes, + &caam_fops_u64_ro); ctrlpriv->ctl_ob_prot_bytes = - debugfs_create_u64("ob_bytes_protected", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ob_prot_bytes); + debugfs_create_file("ob_bytes_protected", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ob_prot_bytes, + &caam_fops_u64_ro); ctrlpriv->ctl_ib_dec_bytes = - debugfs_create_u64("ib_bytes_decrypted", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ib_dec_bytes); + debugfs_create_file("ib_bytes_decrypted", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ib_dec_bytes, + &caam_fops_u64_ro); ctrlpriv->ctl_ib_valid_bytes = - debugfs_create_u64("ib_bytes_validated", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->ib_valid_bytes); + debugfs_create_file("ib_bytes_validated", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->ib_valid_bytes, + &caam_fops_u64_ro); /* Controller level - global status values */ ctrlpriv->ctl_faultaddr = - debugfs_create_u64("fault_addr", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->faultaddr); + debugfs_create_file("fault_addr", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->faultaddr, + &caam_fops_u32_ro); ctrlpriv->ctl_faultdetail = - debugfs_create_u32("fault_detail", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->faultdetail); + debugfs_create_file("fault_detail", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->faultdetail, + &caam_fops_u32_ro); ctrlpriv->ctl_faultstatus = - debugfs_create_u32("fault_status", - S_IRUSR | S_IRGRP | S_IROTH, - ctrlpriv->ctl, &perfmon->status); + debugfs_create_file("fault_status", + S_IRUSR | S_IRGRP | S_IROTH, + ctrlpriv->ctl, &perfmon->status, + &caam_fops_u32_ro); /* Internal covering keys (useful in non-secure mode only) */ ctrlpriv->ctl_kek_wrap.data = &ctrlpriv->ctrl->kek[0]; diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index fe30ff69088c..d8d5584b600b 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -23,16 +23,15 @@ #define SEC4_SG_OFFSET_MASK 0x00001fff struct sec4_sg_entry { -#ifdef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX +#if !defined(CONFIG_ARCH_DMA_ADDR_T_64BIT) && \ + defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX) u32 rsvd1; dma_addr_t ptr; #else u64 ptr; #endif /* CONFIG_CRYPTO_DEV_FSL_CAAM_IMX */ u32 len; - u8 rsvd2; - u8 buf_pool_id; - u16 offset; + u32 bpid_offset; }; /* Max size of any CAAM descriptor in 32-bit words, inclusive of header */ diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h index 98d07de24fc4..ae3aef6e9fee 100644 --- a/drivers/crypto/caam/desc_constr.h +++ b/drivers/crypto/caam/desc_constr.h @@ -5,6 +5,7 @@ */ #include "desc.h" +#include "regs.h" #define IMMEDIATE (1 << 23) #define CAAM_CMD_SZ sizeof(u32) @@ -30,9 +31,11 @@ LDST_SRCDST_WORD_DECOCTRL | \ (LDOFF_ENABLE_AUTO_NFIFO << LDST_OFFSET_SHIFT)) +extern bool caam_little_end; + static inline int desc_len(u32 *desc) { - return *desc & HDR_DESCLEN_MASK; + return caam32_to_cpu(*desc) & HDR_DESCLEN_MASK; } static inline int desc_bytes(void *desc) @@ -52,7 +55,7 @@ static inline void *sh_desc_pdb(u32 *desc) static inline void init_desc(u32 *desc, u32 options) { - *desc = (options | HDR_ONE) + 1; + *desc = cpu_to_caam32((options | HDR_ONE) + 1); } static inline void init_sh_desc(u32 *desc, u32 options) @@ -78,9 +81,10 @@ static inline void append_ptr(u32 *desc, dma_addr_t ptr) { dma_addr_t *offset = (dma_addr_t *)desc_end(desc); - *offset = ptr; + *offset = cpu_to_caam_dma(ptr); - (*desc) += CAAM_PTR_SZ / CAAM_CMD_SZ; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + + CAAM_PTR_SZ / CAAM_CMD_SZ); } static inline void init_job_desc_shared(u32 *desc, dma_addr_t ptr, int len, @@ -99,16 +103,17 @@ static inline void append_data(u32 *desc, void *data, int len) if (len) /* avoid sparse warning: memcpy with byte count of 0 */ memcpy(offset, data, len); - (*desc) += (len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + + (len + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ); } static inline void append_cmd(u32 *desc, u32 command) { u32 *cmd = desc_end(desc); - *cmd = command; + *cmd = cpu_to_caam32(command); - (*desc)++; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + 1); } #define append_u32 append_cmd @@ -117,16 +122,22 @@ static inline void append_u64(u32 *desc, u64 data) { u32 *offset = desc_end(desc); - *offset = upper_32_bits(data); - *(++offset) = lower_32_bits(data); + /* Only 32-bit alignment is guaranteed in descriptor buffer */ + if (caam_little_end) { + *offset = cpu_to_caam32(lower_32_bits(data)); + *(++offset) = cpu_to_caam32(upper_32_bits(data)); + } else { + *offset = cpu_to_caam32(upper_32_bits(data)); + *(++offset) = cpu_to_caam32(lower_32_bits(data)); + } - (*desc) += 2; + (*desc) = cpu_to_caam32(caam32_to_cpu(*desc) + 2); } /* Write command without affecting header, and return pointer to next word */ static inline u32 *write_cmd(u32 *desc, u32 command) { - *desc = command; + *desc = cpu_to_caam32(command); return desc + 1; } @@ -168,14 +179,17 @@ APPEND_CMD_RET(move, MOVE) static inline void set_jump_tgt_here(u32 *desc, u32 *jump_cmd) { - *jump_cmd = *jump_cmd | (desc_len(desc) - (jump_cmd - desc)); + *jump_cmd = cpu_to_caam32(caam32_to_cpu(*jump_cmd) | + (desc_len(desc) - (jump_cmd - desc))); } static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd) { - *move_cmd &= ~MOVE_OFFSET_MASK; - *move_cmd = *move_cmd | ((desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) & - MOVE_OFFSET_MASK); + u32 val = caam32_to_cpu(*move_cmd); + + val &= ~MOVE_OFFSET_MASK; + val |= (desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) & MOVE_OFFSET_MASK; + *move_cmd = cpu_to_caam32(val); } #define APPEND_CMD(cmd, op) \ diff --git a/drivers/crypto/caam/jr.c b/drivers/crypto/caam/jr.c index 5ef4be22eb80..a81f551ac222 100644 --- a/drivers/crypto/caam/jr.c +++ b/drivers/crypto/caam/jr.c @@ -31,7 +31,7 @@ static int caam_reset_hw_jr(struct device *dev) * mask interrupts since we are going to poll * for reset completion status */ - setbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); + clrsetbits_32(&jrp->rregs->rconfig_lo, 0, JRCFG_IMSK); /* initiate flush (required prior to reset) */ wr_reg32(&jrp->rregs->jrcommand, JRCR_RESET); @@ -57,7 +57,7 @@ static int caam_reset_hw_jr(struct device *dev) } /* unmask interrupts */ - clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); + clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0); return 0; } @@ -147,7 +147,7 @@ static irqreturn_t caam_jr_interrupt(int irq, void *st_dev) } /* mask valid interrupts */ - setbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); + clrsetbits_32(&jrp->rregs->rconfig_lo, 0, JRCFG_IMSK); /* Have valid interrupt at this point, just ACK and trigger */ wr_reg32(&jrp->rregs->jrintstatus, irqstate); @@ -182,7 +182,7 @@ static void caam_jr_dequeue(unsigned long devarg) sw_idx = (tail + i) & (JOBR_DEPTH - 1); if (jrp->outring[hw_idx].desc == - jrp->entinfo[sw_idx].desc_addr_dma) + caam_dma_to_cpu(jrp->entinfo[sw_idx].desc_addr_dma)) break; /* found */ } /* we should never fail to find a matching descriptor */ @@ -200,7 +200,7 @@ static void caam_jr_dequeue(unsigned long devarg) usercall = jrp->entinfo[sw_idx].callbk; userarg = jrp->entinfo[sw_idx].cbkarg; userdesc = jrp->entinfo[sw_idx].desc_addr_virt; - userstatus = jrp->outring[hw_idx].jrstatus; + userstatus = caam32_to_cpu(jrp->outring[hw_idx].jrstatus); /* * Make sure all information from the job has been obtained @@ -236,7 +236,7 @@ static void caam_jr_dequeue(unsigned long devarg) } /* reenable / unmask IRQs */ - clrbits32(&jrp->rregs->rconfig_lo, JRCFG_IMSK); + clrsetbits_32(&jrp->rregs->rconfig_lo, JRCFG_IMSK, 0); } /** @@ -330,7 +330,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, int head, tail, desc_size; dma_addr_t desc_dma; - desc_size = (*desc & HDR_JD_LENGTH_MASK) * sizeof(u32); + desc_size = (caam32_to_cpu(*desc) & HDR_JD_LENGTH_MASK) * sizeof(u32); desc_dma = dma_map_single(dev, desc, desc_size, DMA_TO_DEVICE); if (dma_mapping_error(dev, desc_dma)) { dev_err(dev, "caam_jr_enqueue(): can't map jobdesc\n"); @@ -356,7 +356,7 @@ int caam_jr_enqueue(struct device *dev, u32 *desc, head_entry->cbkarg = areq; head_entry->desc_addr_dma = desc_dma; - jrp->inpring[jrp->inp_ring_write_index] = desc_dma; + jrp->inpring[jrp->inp_ring_write_index] = cpu_to_caam_dma(desc_dma); /* * Guarantee that the descriptor's DMA address has been written to @@ -444,9 +444,9 @@ static int caam_jr_init(struct device *dev) spin_lock_init(&jrp->outlock); /* Select interrupt coalescing parameters */ - setbits32(&jrp->rregs->rconfig_lo, JOBR_INTC | - (JOBR_INTC_COUNT_THLD << JRCFG_ICDCT_SHIFT) | - (JOBR_INTC_TIME_THLD << JRCFG_ICTT_SHIFT)); + clrsetbits_32(&jrp->rregs->rconfig_lo, 0, JOBR_INTC | + (JOBR_INTC_COUNT_THLD << JRCFG_ICDCT_SHIFT) | + (JOBR_INTC_TIME_THLD << JRCFG_ICTT_SHIFT)); return 0; diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h index 3a87c0cf879a..d383573d3dd4 100644 --- a/drivers/crypto/caam/pdb.h +++ b/drivers/crypto/caam/pdb.h @@ -11,8 +11,8 @@ /* * PDB- IPSec ESP Header Modification Options */ -#define PDBHMO_ESP_DECAP_SHIFT 12 -#define PDBHMO_ESP_ENCAP_SHIFT 4 +#define PDBHMO_ESP_DECAP_SHIFT 28 +#define PDBHMO_ESP_ENCAP_SHIFT 28 /* * Encap and Decap - Decrement TTL (Hop Limit) - Based on the value of the * Options Byte IP version (IPvsn) field: @@ -32,12 +32,23 @@ */ #define PDBHMO_ESP_DFBIT (0x04 << PDBHMO_ESP_ENCAP_SHIFT) +#define PDBNH_ESP_ENCAP_SHIFT 16 +#define PDBNH_ESP_ENCAP_MASK (0xff << PDBNH_ESP_ENCAP_SHIFT) + +#define PDBHDRLEN_ESP_DECAP_SHIFT 16 +#define PDBHDRLEN_MASK (0x0fff << PDBHDRLEN_ESP_DECAP_SHIFT) + +#define PDB_NH_OFFSET_SHIFT 8 +#define PDB_NH_OFFSET_MASK (0xff << PDB_NH_OFFSET_SHIFT) + /* * PDB - IPSec ESP Encap/Decap Options */ #define PDBOPTS_ESP_ARSNONE 0x00 /* no antireplay window */ #define PDBOPTS_ESP_ARS32 0x40 /* 32-entry antireplay window */ +#define PDBOPTS_ESP_ARS128 0x80 /* 128-entry antireplay window */ #define PDBOPTS_ESP_ARS64 0xc0 /* 64-entry antireplay window */ +#define PDBOPTS_ESP_ARS_MASK 0xc0 /* antireplay window mask */ #define PDBOPTS_ESP_IVSRC 0x20 /* IV comes from internal random gen */ #define PDBOPTS_ESP_ESN 0x10 /* extended sequence included */ #define PDBOPTS_ESP_OUTFMT 0x08 /* output only decapsulation (decap) */ @@ -54,35 +65,73 @@ /* * General IPSec encap/decap PDB definitions */ + +/** + * ipsec_encap_cbc - PDB part for IPsec CBC encapsulation + * @iv: 16-byte array initialization vector + */ struct ipsec_encap_cbc { - u32 iv[4]; + u8 iv[16]; }; +/** + * ipsec_encap_ctr - PDB part for IPsec CTR encapsulation + * @ctr_nonce: 4-byte array nonce + * @ctr_initial: initial count constant + * @iv: initialization vector + */ struct ipsec_encap_ctr { - u32 ctr_nonce; + u8 ctr_nonce[4]; u32 ctr_initial; - u32 iv[2]; + u64 iv; }; +/** + * ipsec_encap_ccm - PDB part for IPsec CCM encapsulation + * @salt: 3-byte array salt (lower 24 bits) + * @ccm_opt: CCM algorithm options - MSB-LSB description: + * b0_flags (8b) - CCM B0; use 0x5B for 8-byte ICV, 0x6B for 12-byte ICV, + * 0x7B for 16-byte ICV (cf. RFC4309, RFC3610) + * ctr_flags (8b) - counter flags; constant equal to 0x3 + * ctr_initial (16b) - initial count constant + * @iv: initialization vector + */ struct ipsec_encap_ccm { - u32 salt; /* lower 24 bits */ - u8 b0_flags; - u8 ctr_flags; - u16 ctr_initial; - u32 iv[2]; + u8 salt[4]; + u32 ccm_opt; + u64 iv; }; +/** + * ipsec_encap_gcm - PDB part for IPsec GCM encapsulation + * @salt: 3-byte array salt (lower 24 bits) + * @rsvd: reserved, do not use + * @iv: initialization vector + */ struct ipsec_encap_gcm { - u32 salt; /* lower 24 bits */ + u8 salt[4]; u32 rsvd1; - u32 iv[2]; + u64 iv; }; +/** + * ipsec_encap_pdb - PDB for IPsec encapsulation + * @options: MSB-LSB description + * hmo (header manipulation options) - 4b + * reserved - 4b + * next header - 8b + * next header offset - 8b + * option flags (depend on selected algorithm) - 8b + * @seq_num_ext_hi: (optional) IPsec Extended Sequence Number (ESN) + * @seq_num: IPsec sequence number + * @spi: IPsec SPI (Security Parameters Index) + * @ip_hdr_len: optional IP Header length (in bytes) + * reserved - 16b + * Opt. IP Hdr Len - 16b + * @ip_hdr: optional IP Header content + */ struct ipsec_encap_pdb { - u8 hmo_rsvd; - u8 ip_nh; - u8 ip_nh_offset; - u8 options; + u32 options; u32 seq_num_ext_hi; u32 seq_num; union { @@ -92,36 +141,65 @@ struct ipsec_encap_pdb { struct ipsec_encap_gcm gcm; }; u32 spi; - u16 rsvd1; - u16 ip_hdr_len; - u32 ip_hdr[0]; /* optional IP Header content */ + u32 ip_hdr_len; + u32 ip_hdr[0]; }; +/** + * ipsec_decap_cbc - PDB part for IPsec CBC decapsulation + * @rsvd: reserved, do not use + */ struct ipsec_decap_cbc { u32 rsvd[2]; }; +/** + * ipsec_decap_ctr - PDB part for IPsec CTR decapsulation + * @ctr_nonce: 4-byte array nonce + * @ctr_initial: initial count constant + */ struct ipsec_decap_ctr { - u32 salt; + u8 ctr_nonce[4]; u32 ctr_initial; }; +/** + * ipsec_decap_ccm - PDB part for IPsec CCM decapsulation + * @salt: 3-byte salt (lower 24 bits) + * @ccm_opt: CCM algorithm options - MSB-LSB description: + * b0_flags (8b) - CCM B0; use 0x5B for 8-byte ICV, 0x6B for 12-byte ICV, + * 0x7B for 16-byte ICV (cf. RFC4309, RFC3610) + * ctr_flags (8b) - counter flags; constant equal to 0x3 + * ctr_initial (16b) - initial count constant + */ struct ipsec_decap_ccm { - u32 salt; - u8 iv_flags; - u8 ctr_flags; - u16 ctr_initial; + u8 salt[4]; + u32 ccm_opt; }; +/** + * ipsec_decap_gcm - PDB part for IPsec GCN decapsulation + * @salt: 4-byte salt + * @rsvd: reserved, do not use + */ struct ipsec_decap_gcm { - u32 salt; + u8 salt[4]; u32 resvd; }; +/** + * ipsec_decap_pdb - PDB for IPsec decapsulation + * @options: MSB-LSB description + * hmo (header manipulation options) - 4b + * IP header length - 12b + * next header offset - 8b + * option flags (depend on selected algorithm) - 8b + * @seq_num_ext_hi: (optional) IPsec Extended Sequence Number (ESN) + * @seq_num: IPsec sequence number + * @anti_replay: Anti-replay window; size depends on ARS (option flags) + */ struct ipsec_decap_pdb { - u16 hmo_ip_hdr_len; - u8 ip_nh_offset; - u8 options; + u32 options; union { struct ipsec_decap_cbc cbc; struct ipsec_decap_ctr ctr; @@ -130,8 +208,7 @@ struct ipsec_decap_pdb { }; u32 seq_num_ext_hi; u32 seq_num; - u32 anti_replay[2]; - u32 end_index[0]; + __be32 anti_replay[4]; }; /* diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 0ba9c40597dc..8c766cf9202c 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -8,6 +8,7 @@ #define REGS_H #include +#include #include /* @@ -65,46 +66,56 @@ * */ -#ifdef CONFIG_ARM -/* These are common macros for Power, put here for ARM */ -#define setbits32(_addr, _v) writel((readl(_addr) | (_v)), (_addr)) -#define clrbits32(_addr, _v) writel((readl(_addr) & ~(_v)), (_addr)) +extern bool caam_little_end; -#define out_arch(type, endian, a, v) __raw_write##type(cpu_to_##endian(v), a) -#define in_arch(type, endian, a) endian##_to_cpu(__raw_read##type(a)) +#define caam_to_cpu(len) \ +static inline u##len caam##len ## _to_cpu(u##len val) \ +{ \ + if (caam_little_end) \ + return le##len ## _to_cpu(val); \ + else \ + return be##len ## _to_cpu(val); \ +} -#define out_le32(a, v) out_arch(l, le32, a, v) -#define in_le32(a) in_arch(l, le32, a) +#define cpu_to_caam(len) \ +static inline u##len cpu_to_caam##len(u##len val) \ +{ \ + if (caam_little_end) \ + return cpu_to_le##len(val); \ + else \ + return cpu_to_be##len(val); \ +} -#define out_be32(a, v) out_arch(l, be32, a, v) -#define in_be32(a) in_arch(l, be32, a) +caam_to_cpu(16) +caam_to_cpu(32) +caam_to_cpu(64) +cpu_to_caam(16) +cpu_to_caam(32) +cpu_to_caam(64) -#define clrsetbits(type, addr, clear, set) \ - out_##type((addr), (in_##type(addr) & ~(clear)) | (set)) +static inline void wr_reg32(void __iomem *reg, u32 data) +{ + if (caam_little_end) + iowrite32(data, reg); + else + iowrite32be(data, reg); +} -#define clrsetbits_be32(addr, clear, set) clrsetbits(be32, addr, clear, set) -#define clrsetbits_le32(addr, clear, set) clrsetbits(le32, addr, clear, set) -#endif +static inline u32 rd_reg32(void __iomem *reg) +{ + if (caam_little_end) + return ioread32(reg); -#ifdef __BIG_ENDIAN -#define wr_reg32(reg, data) out_be32(reg, data) -#define rd_reg32(reg) in_be32(reg) -#define clrsetbits_32(addr, clear, set) clrsetbits_be32(addr, clear, set) -#ifdef CONFIG_64BIT -#define wr_reg64(reg, data) out_be64(reg, data) -#define rd_reg64(reg) in_be64(reg) -#endif -#else -#ifdef __LITTLE_ENDIAN -#define wr_reg32(reg, data) __raw_writel(data, reg) -#define rd_reg32(reg) __raw_readl(reg) -#define clrsetbits_32(addr, clear, set) clrsetbits_le32(addr, clear, set) -#ifdef CONFIG_64BIT -#define wr_reg64(reg, data) __raw_writeq(data, reg) -#define rd_reg64(reg) __raw_readq(reg) -#endif -#endif -#endif + return ioread32be(reg); +} + +static inline void clrsetbits_32(void __iomem *reg, u32 clear, u32 set) +{ + if (caam_little_end) + iowrite32((ioread32(reg) & ~clear) | set, reg); + else + iowrite32be((ioread32be(reg) & ~clear) | set, reg); +} /* * The only users of these wr/rd_reg64 functions is the Job Ring (JR). @@ -123,29 +134,67 @@ * base + 0x0000 : least-significant 32 bits * base + 0x0004 : most-significant 32 bits */ +#ifdef CONFIG_64BIT +static inline void wr_reg64(void __iomem *reg, u64 data) +{ + if (caam_little_end) + iowrite64(data, reg); + else + iowrite64be(data, reg); +} -#ifndef CONFIG_64BIT -#if !defined(CONFIG_CRYPTO_DEV_FSL_CAAM_LE) || \ - defined(CONFIG_CRYPTO_DEV_FSL_CAAM_IMX) -#define REG64_MS32(reg) ((u32 __iomem *)(reg)) -#define REG64_LS32(reg) ((u32 __iomem *)(reg) + 1) -#else -#define REG64_MS32(reg) ((u32 __iomem *)(reg) + 1) -#define REG64_LS32(reg) ((u32 __iomem *)(reg)) -#endif - -static inline void wr_reg64(u64 __iomem *reg, u64 data) +static inline u64 rd_reg64(void __iomem *reg) { - wr_reg32(REG64_MS32(reg), data >> 32); - wr_reg32(REG64_LS32(reg), data); + if (caam_little_end) + return ioread64(reg); + else + return ioread64be(reg); } -static inline u64 rd_reg64(u64 __iomem *reg) +#else /* CONFIG_64BIT */ +static inline void wr_reg64(void __iomem *reg, u64 data) { - return ((u64)rd_reg32(REG64_MS32(reg)) << 32 | - (u64)rd_reg32(REG64_LS32(reg))); +#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX + if (caam_little_end) { + wr_reg32((u32 __iomem *)(reg) + 1, data >> 32); + wr_reg32((u32 __iomem *)(reg), data); + } else +#endif + { + wr_reg32((u32 __iomem *)(reg), data >> 32); + wr_reg32((u32 __iomem *)(reg) + 1, data); + } } + +static inline u64 rd_reg64(void __iomem *reg) +{ +#ifndef CONFIG_CRYPTO_DEV_FSL_CAAM_IMX + if (caam_little_end) + return ((u64)rd_reg32((u32 __iomem *)(reg) + 1) << 32 | + (u64)rd_reg32((u32 __iomem *)(reg))); + else #endif + return ((u64)rd_reg32((u32 __iomem *)(reg)) << 32 | + (u64)rd_reg32((u32 __iomem *)(reg) + 1)); +} +#endif /* CONFIG_64BIT */ + +#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT +#ifdef CONFIG_SOC_IMX7D +#define cpu_to_caam_dma(value) \ + (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \ + (u64)cpu_to_caam32(higher_32_bits(value))) +#define caam_dma_to_cpu(value) \ + (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | \ + (u64)caam32_to_cpu(higher_32_bits(value))) +#else +#define cpu_to_caam_dma(value) cpu_to_caam64(value) +#define caam_dma_to_cpu(value) caam64_to_cpu(value) +#endif /* CONFIG_SOC_IMX7D */ +#else +#define cpu_to_caam_dma(value) cpu_to_caam32(value) +#define caam_dma_to_cpu(value) caam32_to_cpu(value) +#endif /* CONFIG_ARCH_DMA_ADDR_T_64BIT */ /* * jr_outentry @@ -249,6 +298,8 @@ struct caam_perfmon { u32 faultliodn; /* FALR - Fault Address LIODN */ u32 faultdetail; /* FADR - Fault Addr Detail */ u32 rsvd2; +#define CSTA_PLEND BIT(10) +#define CSTA_ALT_PLEND BIT(18) u32 status; /* CSTA - CAAM Status */ u64 rsvd3; diff --git a/drivers/crypto/caam/sg_sw_sec4.h b/drivers/crypto/caam/sg_sw_sec4.h index 2311341b7356..19dc64fede0d 100644 --- a/drivers/crypto/caam/sg_sw_sec4.h +++ b/drivers/crypto/caam/sg_sw_sec4.h @@ -5,6 +5,8 @@ * */ +#include "regs.h" + struct sec4_sg_entry; /* @@ -13,10 +15,9 @@ struct sec4_sg_entry; static inline void dma_to_sec4_sg_one(struct sec4_sg_entry *sec4_sg_ptr, dma_addr_t dma, u32 len, u16 offset) { - sec4_sg_ptr->ptr = dma; - sec4_sg_ptr->len = len; - sec4_sg_ptr->buf_pool_id = 0; - sec4_sg_ptr->offset = offset & SEC4_SG_OFFSET_MASK; + sec4_sg_ptr->ptr = cpu_to_caam_dma(dma); + sec4_sg_ptr->len = cpu_to_caam32(len); + sec4_sg_ptr->bpid_offset = cpu_to_caam32(offset & SEC4_SG_OFFSET_MASK); #ifdef DEBUG print_hex_dump(KERN_ERR, "sec4_sg_ptr@: ", DUMP_PREFIX_ADDRESS, 16, 4, sec4_sg_ptr, @@ -51,7 +52,7 @@ static inline void sg_to_sec4_sg_last(struct scatterlist *sg, int sg_count, u16 offset) { sec4_sg_ptr = sg_to_sec4_sg(sg, sg_count, sec4_sg_ptr, offset); - sec4_sg_ptr->len |= SEC4_SG_LEN_FIN; + sec4_sg_ptr->len |= cpu_to_caam32(SEC4_SG_LEN_FIN); } static inline struct sec4_sg_entry *sg_to_sec4_sg_len( -- cgit v1.2.3 From 9e217795e08a0b0f06afd121401180d756eb1b54 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Horia=20Geant=C4=83?= Date: Thu, 19 May 2016 18:11:33 +0300 Subject: crypto: caam - add ARCH_LAYERSCAPE to supported architectures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This basically adds support for ls1043a platform. Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index d2c2909a4020..ff54c42e6e51 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -1,6 +1,6 @@ config CRYPTO_DEV_FSL_CAAM tristate "Freescale CAAM-Multicore driver backend" - depends on FSL_SOC || ARCH_MXC + depends on FSL_SOC || ARCH_MXC || ARCH_LAYERSCAPE help Enables the driver module for Freescale's Cryptographic Accelerator and Assurance Module (CAAM), also known as the SEC version 4 (SEC4). -- cgit v1.2.3 From 5318c53d5b4bbf097ccba2e74831d67e4d63b1a1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Fri, 27 May 2016 13:49:40 +0200 Subject: crypto: s5p-sss - Use consistent indentation for variables and members Bring some consistency by: 1. Replacing fixed-space indentation of structure members with just tabs. 2. Remove indentation in declaration of local variable between type and name. Driver was mixing usage of such indentation and lack of it. When removing indentation, reorder variables in reversed-christmas-tree order with first variables being initialized ones. Signed-off-by: Krzysztof Kozlowski Acked-by: Vladimir Zapolskiy Signed-off-by: Herbert Xu --- drivers/crypto/s5p-sss.c | 80 ++++++++++++++++++++++++------------------------ 1 file changed, 40 insertions(+), 40 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index 2b3a0cfe3331..dce1af0ce85c 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -155,43 +155,43 @@ * expansion of its usage. */ struct samsung_aes_variant { - unsigned int aes_offset; + unsigned int aes_offset; }; struct s5p_aes_reqctx { - unsigned long mode; + unsigned long mode; }; struct s5p_aes_ctx { - struct s5p_aes_dev *dev; + struct s5p_aes_dev *dev; - uint8_t aes_key[AES_MAX_KEY_SIZE]; - uint8_t nonce[CTR_RFC3686_NONCE_SIZE]; - int keylen; + uint8_t aes_key[AES_MAX_KEY_SIZE]; + uint8_t nonce[CTR_RFC3686_NONCE_SIZE]; + int keylen; }; struct s5p_aes_dev { - struct device *dev; - struct clk *clk; - void __iomem *ioaddr; - void __iomem *aes_ioaddr; - int irq_fc; + struct device *dev; + struct clk *clk; + void __iomem *ioaddr; + void __iomem *aes_ioaddr; + int irq_fc; - struct ablkcipher_request *req; - struct s5p_aes_ctx *ctx; - struct scatterlist *sg_src; - struct scatterlist *sg_dst; + struct ablkcipher_request *req; + struct s5p_aes_ctx *ctx; + struct scatterlist *sg_src; + struct scatterlist *sg_dst; /* In case of unaligned access: */ - struct scatterlist *sg_src_cpy; - struct scatterlist *sg_dst_cpy; + struct scatterlist *sg_src_cpy; + struct scatterlist *sg_dst_cpy; - struct tasklet_struct tasklet; - struct crypto_queue queue; - bool busy; - spinlock_t lock; + struct tasklet_struct tasklet; + struct crypto_queue queue; + bool busy; + spinlock_t lock; - struct samsung_aes_variant *variant; + struct samsung_aes_variant *variant; }; static struct s5p_aes_dev *s5p_dev; @@ -421,11 +421,11 @@ static bool s5p_aes_rx(struct s5p_aes_dev *dev) static irqreturn_t s5p_aes_interrupt(int irq, void *dev_id) { struct platform_device *pdev = dev_id; - struct s5p_aes_dev *dev = platform_get_drvdata(pdev); - uint32_t status; - unsigned long flags; - bool set_dma_tx = false; - bool set_dma_rx = false; + struct s5p_aes_dev *dev = platform_get_drvdata(pdev); + bool set_dma_tx = false; + bool set_dma_rx = false; + unsigned long flags; + uint32_t status; spin_lock_irqsave(&dev->lock, flags); @@ -538,10 +538,10 @@ static int s5p_set_outdata_start(struct s5p_aes_dev *dev, static void s5p_aes_crypt_start(struct s5p_aes_dev *dev, unsigned long mode) { - struct ablkcipher_request *req = dev->req; - uint32_t aes_control; - int err; - unsigned long flags; + struct ablkcipher_request *req = dev->req; + uint32_t aes_control; + unsigned long flags; + int err; aes_control = SSS_AES_KEY_CHANGE_MODE; if (mode & FLAGS_AES_DECRYPT) @@ -653,10 +653,10 @@ exit: static int s5p_aes_crypt(struct ablkcipher_request *req, unsigned long mode) { - struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); - struct s5p_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); - struct s5p_aes_reqctx *reqctx = ablkcipher_request_ctx(req); - struct s5p_aes_dev *dev = ctx->dev; + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct s5p_aes_reqctx *reqctx = ablkcipher_request_ctx(req); + struct s5p_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); + struct s5p_aes_dev *dev = ctx->dev; if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE)) { dev_err(dev->dev, "request size is not exact amount of AES blocks\n"); @@ -671,7 +671,7 @@ static int s5p_aes_crypt(struct ablkcipher_request *req, unsigned long mode) static int s5p_aes_setkey(struct crypto_ablkcipher *cipher, const uint8_t *key, unsigned int keylen) { - struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); + struct crypto_tfm *tfm = crypto_ablkcipher_tfm(cipher); struct s5p_aes_ctx *ctx = crypto_tfm_ctx(tfm); if (keylen != AES_KEYSIZE_128 && @@ -763,11 +763,11 @@ static struct crypto_alg algs[] = { static int s5p_aes_probe(struct platform_device *pdev) { - int i, j, err = -ENODEV; - struct s5p_aes_dev *pdata; - struct device *dev = &pdev->dev; - struct resource *res; + struct device *dev = &pdev->dev; + int i, j, err = -ENODEV; struct samsung_aes_variant *variant; + struct s5p_aes_dev *pdata; + struct resource *res; if (s5p_dev) return -EEXIST; -- cgit v1.2.3 From 3741bbb207f7b9e92ad3b878a30ccd1ddcdb8ac8 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Wed, 1 Jun 2016 11:56:02 +0300 Subject: crypto: omap-aes - Fix registration of algorithms Algorithms can be registered only once. So skip registration of algorithms if already registered (i.e. in case we have two AES cores in the system.) Signed-off-by: Lokesh Vutla Signed-off-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/omap-aes.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index ce174d3b842c..4a0e6a545ba2 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -1185,17 +1185,19 @@ static int omap_aes_probe(struct platform_device *pdev) spin_unlock(&list_lock); for (i = 0; i < dd->pdata->algs_info_size; i++) { - for (j = 0; j < dd->pdata->algs_info[i].size; j++) { - algp = &dd->pdata->algs_info[i].algs_list[j]; + if (!dd->pdata->algs_info[i].registered) { + for (j = 0; j < dd->pdata->algs_info[i].size; j++) { + algp = &dd->pdata->algs_info[i].algs_list[j]; - pr_debug("reg alg: %s\n", algp->cra_name); - INIT_LIST_HEAD(&algp->cra_list); + pr_debug("reg alg: %s\n", algp->cra_name); + INIT_LIST_HEAD(&algp->cra_list); - err = crypto_register_alg(algp); - if (err) - goto err_algs; + err = crypto_register_alg(algp); + if (err) + goto err_algs; - dd->pdata->algs_info[i].registered++; + dd->pdata->algs_info[i].registered++; + } } } -- cgit v1.2.3 From b096b544d6c3cb65c53551ddedd21180f9087ab9 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 6 Jun 2016 13:20:34 +0200 Subject: crypto: talitos - using helpers for all talitos_ptr operations Use helper for all modifications to talitos_ptr in preparation to the implementation of AEAD for SEC1 to_talitos_ptr_extent_clear() has been removed in favor of to_talitos_ptr_ext_set() to set any value and to_talitos_ptr_ext_or() to or the extent field with a value name has been shorten to help keeping single lines of 80 chars Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 59 ++++++++++++++++++++++++++++-------------------- 1 file changed, 34 insertions(+), 25 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index b7ee8d30147d..a92aa37f32a0 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -91,10 +91,17 @@ static unsigned short from_talitos_ptr_len(struct talitos_ptr *ptr, return be16_to_cpu(ptr->len); } -static void to_talitos_ptr_extent_clear(struct talitos_ptr *ptr, bool is_sec1) +static void to_talitos_ptr_ext_set(struct talitos_ptr *ptr, u8 val, + bool is_sec1) { if (!is_sec1) - ptr->j_extent = 0; + ptr->j_extent = val; +} + +static void to_talitos_ptr_ext_or(struct talitos_ptr *ptr, u8 val, bool is_sec1) +{ + if (!is_sec1) + ptr->j_extent |= val; } /* @@ -111,7 +118,7 @@ static void map_single_talitos_ptr(struct device *dev, to_talitos_ptr_len(ptr, len, is_sec1); to_talitos_ptr(ptr, dma_addr, is_sec1); - to_talitos_ptr_extent_clear(ptr, is_sec1); + to_talitos_ptr_ext_set(ptr, 0, is_sec1); } /* @@ -1050,8 +1057,8 @@ static int sg_to_link_tbl_offset(struct scatterlist *sg, int sg_count, to_talitos_ptr(link_tbl_ptr + count, sg_dma_address(sg) + offset, 0); - link_tbl_ptr[count].len = cpu_to_be16(len); - link_tbl_ptr[count].j_extent = 0; + to_talitos_ptr_len(link_tbl_ptr + count, len, 0); + to_talitos_ptr_ext_set(link_tbl_ptr + count, 0, 0); count++; cryptlen -= len; offset = 0; @@ -1062,7 +1069,8 @@ next: /* tag end of link table */ if (count > 0) - link_tbl_ptr[count - 1].j_extent = DESC_PTR_LNKTBL_RETURN; + to_talitos_ptr_ext_set(link_tbl_ptr + count - 1, + DESC_PTR_LNKTBL_RETURN, 0); return count; } @@ -1102,14 +1110,14 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, (areq->src == areq->dst) ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE); /* hmac data */ - desc->ptr[1].len = cpu_to_be16(areq->assoclen); + to_talitos_ptr_len(&desc->ptr[1], areq->assoclen, 0); if (sg_count > 1 && (ret = sg_to_link_tbl_offset(areq->src, sg_count, 0, areq->assoclen, &edesc->link_tbl[tbl_off])) > 1) { to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off * sizeof(struct talitos_ptr), 0); - desc->ptr[1].j_extent = DESC_PTR_LNKTBL_JUMP; + to_talitos_ptr_ext_set(&desc->ptr[1], DESC_PTR_LNKTBL_JUMP, 0); dma_sync_single_for_device(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); @@ -1117,13 +1125,13 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, tbl_off += ret; } else { to_talitos_ptr(&desc->ptr[1], sg_dma_address(areq->src), 0); - desc->ptr[1].j_extent = 0; + to_talitos_ptr_ext_set(&desc->ptr[1], 0, 0); } /* cipher iv */ to_talitos_ptr(&desc->ptr[2], edesc->iv_dma, 0); - desc->ptr[2].len = cpu_to_be16(ivsize); - desc->ptr[2].j_extent = 0; + to_talitos_ptr_len(&desc->ptr[2], ivsize, 0); + to_talitos_ptr_ext_set(&desc->ptr[2], 0, 0); /* cipher key */ map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen, @@ -1136,8 +1144,8 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, * extent is bytes of HMAC postpended to ciphertext, * typically 12 for ipsec */ - desc->ptr[4].len = cpu_to_be16(cryptlen); - desc->ptr[4].j_extent = authsize; + to_talitos_ptr_len(&desc->ptr[4], cryptlen, 0); + to_talitos_ptr_ext_set(&desc->ptr[4], authsize, 0); sg_link_tbl_len = cryptlen; if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) @@ -1150,7 +1158,7 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, areq->assoclen, sg_link_tbl_len, &edesc->link_tbl[tbl_off])) > 1) { - desc->ptr[4].j_extent |= DESC_PTR_LNKTBL_JUMP; + to_talitos_ptr_ext_or(&desc->ptr[4], DESC_PTR_LNKTBL_JUMP, 0); to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl + tbl_off * sizeof(struct talitos_ptr), 0); @@ -1163,8 +1171,8 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, } /* cipher out */ - desc->ptr[5].len = cpu_to_be16(cryptlen); - desc->ptr[5].j_extent = authsize; + to_talitos_ptr_len(&desc->ptr[5], cryptlen, 0); + to_talitos_ptr_ext_set(&desc->ptr[5], authsize, 0); if (areq->src != areq->dst) sg_count = dma_map_sg(dev, areq->dst, edesc->dst_nents ? : 1, @@ -1186,17 +1194,17 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, /* Add an entry to the link table for ICV data */ tbl_ptr += sg_count - 1; - tbl_ptr->j_extent = 0; + to_talitos_ptr_ext_set(tbl_ptr, 0, 0); tbl_ptr++; - tbl_ptr->j_extent = DESC_PTR_LNKTBL_RETURN; - tbl_ptr->len = cpu_to_be16(authsize); + to_talitos_ptr_ext_set(tbl_ptr, DESC_PTR_LNKTBL_RETURN, 0); + to_talitos_ptr_len(tbl_ptr, authsize, 0); /* icv data follows link tables */ to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + (edesc->src_nents + edesc->dst_nents + 2) * sizeof(struct talitos_ptr) + authsize, 0); - desc->ptr[5].j_extent |= DESC_PTR_LNKTBL_JUMP; + to_talitos_ptr_ext_or(&desc->ptr[5], DESC_PTR_LNKTBL_JUMP, 0); dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); @@ -1493,7 +1501,7 @@ int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, len, DMA_TO_DEVICE); } } else { - to_talitos_ptr_extent_clear(ptr, is_sec1); + to_talitos_ptr_ext_set(ptr, 0, is_sec1); sg_count = dma_map_sg(dev, src, edesc->src_nents ? : 1, dir); @@ -1504,7 +1512,8 @@ int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, &edesc->link_tbl[0]); if (sg_count > 1) { to_talitos_ptr(ptr, edesc->dma_link_tbl, 0); - ptr->j_extent |= DESC_PTR_LNKTBL_JUMP; + to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, + 0); dma_sync_single_for_device(dev, edesc->dma_link_tbl, edesc->dma_len, @@ -1544,7 +1553,7 @@ void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, len, DMA_FROM_DEVICE); } } else { - to_talitos_ptr_extent_clear(ptr, is_sec1); + to_talitos_ptr_ext_set(ptr, 0, is_sec1); if (sg_count == 1) { to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); @@ -1555,7 +1564,7 @@ void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, to_talitos_ptr(ptr, edesc->dma_link_tbl + (edesc->src_nents + 1) * sizeof(struct talitos_ptr), 0); - ptr->j_extent |= DESC_PTR_LNKTBL_JUMP; + to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, 0); sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr); dma_sync_single_for_device(dev, edesc->dma_link_tbl, edesc->dma_len, @@ -1586,7 +1595,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* cipher iv */ to_talitos_ptr(&desc->ptr[1], edesc->iv_dma, is_sec1); to_talitos_ptr_len(&desc->ptr[1], ivsize, is_sec1); - to_talitos_ptr_extent_clear(&desc->ptr[1], is_sec1); + to_talitos_ptr_ext_set(&desc->ptr[1], 0, is_sec1); /* cipher key */ map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, -- cgit v1.2.3 From 246a87cda070a708361cee4309748bcb2cb6121d Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 6 Jun 2016 13:20:36 +0200 Subject: crypto: talitos - move mapping helpers before IPSEC functions In order to be able to use the mapping/unmapping helpers for IPSEC it needs to be move upper in the file Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 244 +++++++++++++++++++++++------------------------ 1 file changed, 122 insertions(+), 122 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index a92aa37f32a0..beb369e557b9 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -926,6 +926,33 @@ static void talitos_sg_unmap(struct device *dev, dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL); } +static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src, + struct scatterlist *dst, unsigned int len, + struct talitos_edesc *edesc) +{ + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + if (is_sec1) { + if (!edesc->src_nents) { + dma_unmap_sg(dev, src, 1, + dst != src ? DMA_TO_DEVICE + : DMA_BIDIRECTIONAL); + } + if (dst && edesc->dst_nents) { + dma_sync_single_for_device(dev, + edesc->dma_link_tbl + len, + len, DMA_FROM_DEVICE); + sg_copy_from_buffer(dst, edesc->dst_nents ? : 1, + edesc->buf + len, len); + } else if (dst && dst != src) { + dma_unmap_sg(dev, dst, 1, DMA_FROM_DEVICE); + } + } else { + talitos_sg_unmap(dev, edesc, src, dst); + } +} + static void ipsec_esp_unmap(struct device *dev, struct talitos_edesc *edesc, struct aead_request *areq) @@ -1083,6 +1110,101 @@ static inline int sg_to_link_tbl(struct scatterlist *sg, int sg_count, link_tbl_ptr); } +int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, + unsigned int len, struct talitos_edesc *edesc, + enum dma_data_direction dir, struct talitos_ptr *ptr) +{ + int sg_count; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + to_talitos_ptr_len(ptr, len, is_sec1); + + if (is_sec1) { + sg_count = edesc->src_nents ? : 1; + + if (sg_count == 1) { + dma_map_sg(dev, src, 1, dir); + to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); + } else { + sg_copy_to_buffer(src, sg_count, edesc->buf, len); + to_talitos_ptr(ptr, edesc->dma_link_tbl, is_sec1); + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + len, DMA_TO_DEVICE); + } + } else { + to_talitos_ptr_ext_set(ptr, 0, is_sec1); + + sg_count = dma_map_sg(dev, src, edesc->src_nents ? : 1, dir); + + if (sg_count == 1) { + to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); + } else { + sg_count = sg_to_link_tbl(src, sg_count, len, + &edesc->link_tbl[0]); + if (sg_count > 1) { + to_talitos_ptr(ptr, edesc->dma_link_tbl, 0); + to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, + 0); + dma_sync_single_for_device(dev, + edesc->dma_link_tbl, + edesc->dma_len, + DMA_BIDIRECTIONAL); + } else { + /* Only one segment now, so no link tbl needed*/ + to_talitos_ptr(ptr, sg_dma_address(src), + is_sec1); + } + } + } + return sg_count; +} + +void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, + unsigned int len, struct talitos_edesc *edesc, + enum dma_data_direction dir, + struct talitos_ptr *ptr, int sg_count) +{ + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); + + if (dir != DMA_NONE) + sg_count = dma_map_sg(dev, dst, edesc->dst_nents ? : 1, dir); + + to_talitos_ptr_len(ptr, len, is_sec1); + + if (is_sec1) { + if (sg_count == 1) { + if (dir != DMA_NONE) + dma_map_sg(dev, dst, 1, dir); + to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); + } else { + to_talitos_ptr(ptr, edesc->dma_link_tbl + len, is_sec1); + dma_sync_single_for_device(dev, + edesc->dma_link_tbl + len, + len, DMA_FROM_DEVICE); + } + } else { + to_talitos_ptr_ext_set(ptr, 0, is_sec1); + + if (sg_count == 1) { + to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); + } else { + struct talitos_ptr *link_tbl_ptr = + &edesc->link_tbl[edesc->src_nents + 1]; + + to_talitos_ptr(ptr, edesc->dma_link_tbl + + (edesc->src_nents + 1) * + sizeof(struct talitos_ptr), 0); + to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, 0); + sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr); + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, + DMA_BIDIRECTIONAL); + } + } +} + /* * fill in and submit ipsec_esp descriptor */ @@ -1420,33 +1542,6 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *cipher, return 0; } -static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src, - struct scatterlist *dst, unsigned int len, - struct talitos_edesc *edesc) -{ - struct talitos_private *priv = dev_get_drvdata(dev); - bool is_sec1 = has_ftr_sec1(priv); - - if (is_sec1) { - if (!edesc->src_nents) { - dma_unmap_sg(dev, src, 1, - dst != src ? DMA_TO_DEVICE - : DMA_BIDIRECTIONAL); - } - if (dst && edesc->dst_nents) { - dma_sync_single_for_device(dev, - edesc->dma_link_tbl + len, - len, DMA_FROM_DEVICE); - sg_copy_from_buffer(dst, edesc->dst_nents ? : 1, - edesc->buf + len, len); - } else if (dst && dst != src) { - dma_unmap_sg(dev, dst, 1, DMA_FROM_DEVICE); - } - } else { - talitos_sg_unmap(dev, edesc, src, dst); - } -} - static void common_nonsnoop_unmap(struct device *dev, struct talitos_edesc *edesc, struct ablkcipher_request *areq) @@ -1478,101 +1573,6 @@ static void ablkcipher_done(struct device *dev, areq->base.complete(&areq->base, err); } -int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, - unsigned int len, struct talitos_edesc *edesc, - enum dma_data_direction dir, struct talitos_ptr *ptr) -{ - int sg_count; - struct talitos_private *priv = dev_get_drvdata(dev); - bool is_sec1 = has_ftr_sec1(priv); - - to_talitos_ptr_len(ptr, len, is_sec1); - - if (is_sec1) { - sg_count = edesc->src_nents ? : 1; - - if (sg_count == 1) { - dma_map_sg(dev, src, 1, dir); - to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); - } else { - sg_copy_to_buffer(src, sg_count, edesc->buf, len); - to_talitos_ptr(ptr, edesc->dma_link_tbl, is_sec1); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - len, DMA_TO_DEVICE); - } - } else { - to_talitos_ptr_ext_set(ptr, 0, is_sec1); - - sg_count = dma_map_sg(dev, src, edesc->src_nents ? : 1, dir); - - if (sg_count == 1) { - to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); - } else { - sg_count = sg_to_link_tbl(src, sg_count, len, - &edesc->link_tbl[0]); - if (sg_count > 1) { - to_talitos_ptr(ptr, edesc->dma_link_tbl, 0); - to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, - 0); - dma_sync_single_for_device(dev, - edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - } else { - /* Only one segment now, so no link tbl needed*/ - to_talitos_ptr(ptr, sg_dma_address(src), - is_sec1); - } - } - } - return sg_count; -} - -void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, - unsigned int len, struct talitos_edesc *edesc, - enum dma_data_direction dir, - struct talitos_ptr *ptr, int sg_count) -{ - struct talitos_private *priv = dev_get_drvdata(dev); - bool is_sec1 = has_ftr_sec1(priv); - - if (dir != DMA_NONE) - sg_count = dma_map_sg(dev, dst, edesc->dst_nents ? : 1, dir); - - to_talitos_ptr_len(ptr, len, is_sec1); - - if (is_sec1) { - if (sg_count == 1) { - if (dir != DMA_NONE) - dma_map_sg(dev, dst, 1, dir); - to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); - } else { - to_talitos_ptr(ptr, edesc->dma_link_tbl + len, is_sec1); - dma_sync_single_for_device(dev, - edesc->dma_link_tbl + len, - len, DMA_FROM_DEVICE); - } - } else { - to_talitos_ptr_ext_set(ptr, 0, is_sec1); - - if (sg_count == 1) { - to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); - } else { - struct talitos_ptr *link_tbl_ptr = - &edesc->link_tbl[edesc->src_nents + 1]; - - to_talitos_ptr(ptr, edesc->dma_link_tbl + - (edesc->src_nents + 1) * - sizeof(struct talitos_ptr), 0); - to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, 0); - sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - } - } -} - static int common_nonsnoop(struct talitos_edesc *edesc, struct ablkcipher_request *areq, void (*callback) (struct device *dev, -- cgit v1.2.3 From 6a1e8d14156d4afb6a835d97170d181ea6b5238c Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 6 Jun 2016 13:20:38 +0200 Subject: crypto: talitos - making mapping helpers more generic In preparation of IPSEC for SEC1, first step is to make the mapping helpers more generic so that they can also be used by AEAD functions. First, the functions are moved before IPSEC functions in talitos.c talitos_sg_unmap() and unmap_sg_talitos_ptr() are merged as they are quite similar, the second one handling the SEC1 case an calling the first one for SEC2 map_sg_in_talitos_ptr() and map_sg_out_talitos_ptr() are merged into talitos_sg_map() and enhenced to support offseted zones as used for AEAD. The actual mapping is now performed outside that helper. The DMA sync is also done outside to not make it several times. talitos_edesc_alloc() size calculation are fixed to also take into account AEAD specific parts also for SEC1 Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 230 +++++++++++++++++++---------------------------- 1 file changed, 93 insertions(+), 137 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index beb369e557b9..b0d3c24f7f79 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -911,45 +911,28 @@ struct talitos_edesc { static void talitos_sg_unmap(struct device *dev, struct talitos_edesc *edesc, struct scatterlist *src, - struct scatterlist *dst) + struct scatterlist *dst, + unsigned int len, unsigned int offset) { + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); unsigned int src_nents = edesc->src_nents ? : 1; unsigned int dst_nents = edesc->dst_nents ? : 1; + if (is_sec1 && dst && dst_nents > 1) { + dma_sync_single_for_device(dev, edesc->dma_link_tbl + offset, + len, DMA_FROM_DEVICE); + sg_pcopy_from_buffer(dst, dst_nents, edesc->buf + offset, len, + offset); + } if (src != dst) { - dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE); + if (src_nents == 1 || !is_sec1) + dma_unmap_sg(dev, src, src_nents, DMA_TO_DEVICE); - if (dst) { + if (dst && (dst_nents == 1 || !is_sec1)) dma_unmap_sg(dev, dst, dst_nents, DMA_FROM_DEVICE); - } - } else + } else if (src_nents == 1 || !is_sec1) { dma_unmap_sg(dev, src, src_nents, DMA_BIDIRECTIONAL); -} - -static void unmap_sg_talitos_ptr(struct device *dev, struct scatterlist *src, - struct scatterlist *dst, unsigned int len, - struct talitos_edesc *edesc) -{ - struct talitos_private *priv = dev_get_drvdata(dev); - bool is_sec1 = has_ftr_sec1(priv); - - if (is_sec1) { - if (!edesc->src_nents) { - dma_unmap_sg(dev, src, 1, - dst != src ? DMA_TO_DEVICE - : DMA_BIDIRECTIONAL); - } - if (dst && edesc->dst_nents) { - dma_sync_single_for_device(dev, - edesc->dma_link_tbl + len, - len, DMA_FROM_DEVICE); - sg_copy_from_buffer(dst, edesc->dst_nents ? : 1, - edesc->buf + len, len); - } else if (dst && dst != src) { - dma_unmap_sg(dev, dst, 1, DMA_FROM_DEVICE); - } - } else { - talitos_sg_unmap(dev, edesc, src, dst); } } @@ -962,7 +945,8 @@ static void ipsec_esp_unmap(struct device *dev, unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[0], DMA_TO_DEVICE); - talitos_sg_unmap(dev, edesc, areq->src, areq->dst); + talitos_sg_unmap(dev, edesc, areq->src, areq->dst, areq->cryptlen, + areq->assoclen); if (edesc->dma_len) dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len, @@ -1110,99 +1094,37 @@ static inline int sg_to_link_tbl(struct scatterlist *sg, int sg_count, link_tbl_ptr); } -int map_sg_in_talitos_ptr(struct device *dev, struct scatterlist *src, - unsigned int len, struct talitos_edesc *edesc, - enum dma_data_direction dir, struct talitos_ptr *ptr) +int talitos_sg_map(struct device *dev, struct scatterlist *src, + unsigned int len, struct talitos_edesc *edesc, + struct talitos_ptr *ptr, + int sg_count, unsigned int offset, int tbl_off) { - int sg_count; struct talitos_private *priv = dev_get_drvdata(dev); bool is_sec1 = has_ftr_sec1(priv); to_talitos_ptr_len(ptr, len, is_sec1); + to_talitos_ptr_ext_set(ptr, 0, is_sec1); - if (is_sec1) { - sg_count = edesc->src_nents ? : 1; - - if (sg_count == 1) { - dma_map_sg(dev, src, 1, dir); - to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); - } else { - sg_copy_to_buffer(src, sg_count, edesc->buf, len); - to_talitos_ptr(ptr, edesc->dma_link_tbl, is_sec1); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - len, DMA_TO_DEVICE); - } - } else { - to_talitos_ptr_ext_set(ptr, 0, is_sec1); - - sg_count = dma_map_sg(dev, src, edesc->src_nents ? : 1, dir); - - if (sg_count == 1) { - to_talitos_ptr(ptr, sg_dma_address(src), is_sec1); - } else { - sg_count = sg_to_link_tbl(src, sg_count, len, - &edesc->link_tbl[0]); - if (sg_count > 1) { - to_talitos_ptr(ptr, edesc->dma_link_tbl, 0); - to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, - 0); - dma_sync_single_for_device(dev, - edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - } else { - /* Only one segment now, so no link tbl needed*/ - to_talitos_ptr(ptr, sg_dma_address(src), - is_sec1); - } - } + if (sg_count == 1) { + to_talitos_ptr(ptr, sg_dma_address(src) + offset, is_sec1); + return sg_count; } - return sg_count; -} - -void map_sg_out_talitos_ptr(struct device *dev, struct scatterlist *dst, - unsigned int len, struct talitos_edesc *edesc, - enum dma_data_direction dir, - struct talitos_ptr *ptr, int sg_count) -{ - struct talitos_private *priv = dev_get_drvdata(dev); - bool is_sec1 = has_ftr_sec1(priv); - - if (dir != DMA_NONE) - sg_count = dma_map_sg(dev, dst, edesc->dst_nents ? : 1, dir); - - to_talitos_ptr_len(ptr, len, is_sec1); - if (is_sec1) { - if (sg_count == 1) { - if (dir != DMA_NONE) - dma_map_sg(dev, dst, 1, dir); - to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); - } else { - to_talitos_ptr(ptr, edesc->dma_link_tbl + len, is_sec1); - dma_sync_single_for_device(dev, - edesc->dma_link_tbl + len, - len, DMA_FROM_DEVICE); - } - } else { - to_talitos_ptr_ext_set(ptr, 0, is_sec1); - - if (sg_count == 1) { - to_talitos_ptr(ptr, sg_dma_address(dst), is_sec1); - } else { - struct talitos_ptr *link_tbl_ptr = - &edesc->link_tbl[edesc->src_nents + 1]; - - to_talitos_ptr(ptr, edesc->dma_link_tbl + - (edesc->src_nents + 1) * - sizeof(struct talitos_ptr), 0); - to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, 0); - sg_to_link_tbl(dst, sg_count, len, link_tbl_ptr); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - } + to_talitos_ptr(ptr, edesc->dma_link_tbl + offset, is_sec1); + return sg_count; } + sg_count = sg_to_link_tbl_offset(src, sg_count, offset, len, + &edesc->link_tbl[tbl_off]); + if (sg_count == 1) { + /* Only one segment now, so no link tbl needed*/ + copy_talitos_ptr(ptr, &edesc->link_tbl[tbl_off], is_sec1); + return sg_count; + } + to_talitos_ptr(ptr, edesc->dma_link_tbl + + tbl_off * sizeof(struct talitos_ptr), is_sec1); + to_talitos_ptr_ext_or(ptr, DESC_PTR_LNKTBL_JUMP, is_sec1); + + return sg_count; } /* @@ -1363,7 +1285,7 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, bool encrypt) { struct talitos_edesc *edesc; - int src_nents, dst_nents, alloc_len, dma_len; + int src_nents, dst_nents, alloc_len, dma_len, src_len, dst_len; dma_addr_t iv_dma = 0; gfp_t flags = cryptoflags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; @@ -1381,8 +1303,8 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, iv_dma = dma_map_single(dev, iv, ivsize, DMA_TO_DEVICE); if (!dst || dst == src) { - src_nents = sg_nents_for_len(src, - assoclen + cryptlen + authsize); + src_len = assoclen + cryptlen + authsize; + src_nents = sg_nents_for_len(src, src_len); if (src_nents < 0) { dev_err(dev, "Invalid number of src SG.\n"); err = ERR_PTR(-EINVAL); @@ -1390,17 +1312,18 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, } src_nents = (src_nents == 1) ? 0 : src_nents; dst_nents = dst ? src_nents : 0; + dst_len = 0; } else { /* dst && dst != src*/ - src_nents = sg_nents_for_len(src, assoclen + cryptlen + - (encrypt ? 0 : authsize)); + src_len = assoclen + cryptlen + (encrypt ? 0 : authsize); + src_nents = sg_nents_for_len(src, src_len); if (src_nents < 0) { dev_err(dev, "Invalid number of src SG.\n"); err = ERR_PTR(-EINVAL); goto error_sg; } src_nents = (src_nents == 1) ? 0 : src_nents; - dst_nents = sg_nents_for_len(dst, assoclen + cryptlen + - (encrypt ? authsize : 0)); + dst_len = assoclen + cryptlen + (encrypt ? authsize : 0); + dst_nents = sg_nents_for_len(dst, dst_len); if (dst_nents < 0) { dev_err(dev, "Invalid number of dst SG.\n"); err = ERR_PTR(-EINVAL); @@ -1417,8 +1340,8 @@ static struct talitos_edesc *talitos_edesc_alloc(struct device *dev, alloc_len = sizeof(struct talitos_edesc); if (src_nents || dst_nents) { if (is_sec1) - dma_len = (src_nents ? cryptlen : 0) + - (dst_nents ? cryptlen : 0); + dma_len = (src_nents ? src_len : 0) + + (dst_nents ? dst_len : 0); else dma_len = (src_nents + dst_nents + 2) * sizeof(struct talitos_ptr) + authsize * 2; @@ -1548,7 +1471,7 @@ static void common_nonsnoop_unmap(struct device *dev, { unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE); - unmap_sg_talitos_ptr(dev, areq->src, areq->dst, areq->nbytes, edesc); + talitos_sg_unmap(dev, edesc, areq->src, areq->dst, areq->nbytes, 0); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[1], DMA_TO_DEVICE); @@ -1586,6 +1509,7 @@ static int common_nonsnoop(struct talitos_edesc *edesc, unsigned int cryptlen = areq->nbytes; unsigned int ivsize = crypto_ablkcipher_ivsize(cipher); int sg_count, ret; + bool sync_needed = false; struct talitos_private *priv = dev_get_drvdata(dev); bool is_sec1 = has_ftr_sec1(priv); @@ -1601,19 +1525,33 @@ static int common_nonsnoop(struct talitos_edesc *edesc, map_single_talitos_ptr(dev, &desc->ptr[2], ctx->keylen, (char *)&ctx->key, DMA_TO_DEVICE); + sg_count = edesc->src_nents ?: 1; + if (is_sec1 && sg_count > 1) + sg_copy_to_buffer(areq->src, sg_count, edesc->buf, + cryptlen); + else + sg_count = dma_map_sg(dev, areq->src, sg_count, + (areq->src == areq->dst) ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE); /* * cipher in */ - sg_count = map_sg_in_talitos_ptr(dev, areq->src, cryptlen, edesc, - (areq->src == areq->dst) ? - DMA_BIDIRECTIONAL : DMA_TO_DEVICE, - &desc->ptr[3]); + sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc, + &desc->ptr[3], sg_count, 0, 0); + if (sg_count > 1) + sync_needed = true; /* cipher out */ - map_sg_out_talitos_ptr(dev, areq->dst, cryptlen, edesc, - (areq->src == areq->dst) ? DMA_NONE - : DMA_FROM_DEVICE, - &desc->ptr[4], sg_count); + if (areq->src != areq->dst) { + sg_count = edesc->dst_nents ? : 1; + if (!is_sec1 || sg_count == 1) + dma_map_sg(dev, areq->dst, sg_count, DMA_FROM_DEVICE); + } + + ret = talitos_sg_map(dev, areq->dst, cryptlen, edesc, &desc->ptr[4], + sg_count, 0, (edesc->src_nents + 1)); + if (ret > 1) + sync_needed = true; /* iv out */ map_single_talitos_ptr(dev, &desc->ptr[5], ivsize, ctx->iv, @@ -1622,6 +1560,10 @@ static int common_nonsnoop(struct talitos_edesc *edesc, /* last DWORD empty */ desc->ptr[6] = zero_entry; + if (sync_needed) + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, DMA_BIDIRECTIONAL); + ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { common_nonsnoop_unmap(dev, edesc, areq); @@ -1685,7 +1627,7 @@ static void common_nonsnoop_hash_unmap(struct device *dev, unmap_single_talitos_ptr(dev, &edesc->desc.ptr[5], DMA_FROM_DEVICE); - unmap_sg_talitos_ptr(dev, req_ctx->psrc, NULL, 0, edesc); + talitos_sg_unmap(dev, edesc, req_ctx->psrc, NULL, 0, 0); /* When using hashctx-in, must unmap it. */ if (from_talitos_ptr_len(&edesc->desc.ptr[1], is_sec1)) @@ -1756,8 +1698,10 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, struct device *dev = ctx->dev; struct talitos_desc *desc = &edesc->desc; int ret; + bool sync_needed = false; struct talitos_private *priv = dev_get_drvdata(dev); bool is_sec1 = has_ftr_sec1(priv); + int sg_count; /* first DWORD empty */ desc->ptr[0] = zero_entry; @@ -1782,11 +1726,19 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, else desc->ptr[2] = zero_entry; + sg_count = edesc->src_nents ?: 1; + if (is_sec1 && sg_count > 1) + sg_copy_to_buffer(areq->src, sg_count, edesc->buf, length); + else + sg_count = dma_map_sg(dev, req_ctx->psrc, sg_count, + DMA_TO_DEVICE); /* * data in */ - map_sg_in_talitos_ptr(dev, req_ctx->psrc, length, edesc, - DMA_TO_DEVICE, &desc->ptr[3]); + sg_count = talitos_sg_map(dev, req_ctx->psrc, length, edesc, + &desc->ptr[3], sg_count, 0, 0); + if (sg_count > 1) + sync_needed = true; /* fifth DWORD empty */ desc->ptr[4] = zero_entry; @@ -1807,6 +1759,10 @@ static int common_nonsnoop_hash(struct talitos_edesc *edesc, if (is_sec1 && from_talitos_ptr_len(&desc->ptr[3], true) == 0) talitos_handle_buggy_hash(ctx, edesc, &desc->ptr[3]); + if (sync_needed) + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, DMA_BIDIRECTIONAL); + ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { common_nonsnoop_hash_unmap(dev, edesc, areq); -- cgit v1.2.3 From 549bd8bc598759607b5ffc2eb052a3c99a85552e Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 6 Jun 2016 13:20:40 +0200 Subject: crypto: talitos - Implement AEAD for SEC1 using HMAC_SNOOP_NO_AFEU This patchs enhances the IPSEC_ESP related functions for them to also supports the same operations with descriptor type HMAC_SNOOP_NO_AFEU. The differences between the two descriptor types are: * pointeurs 2 and 3 are swaped (Confidentiality key and Primary EU Context IN) * HMAC_SNOOP_NO_AFEU has CICV out in pointer 6 * HMAC_SNOOP_NO_AFEU has no primary EU context out so we get it from the end of data out Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 209 ++++++++++++++++++++++++++++------------------- 1 file changed, 124 insertions(+), 85 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index b0d3c24f7f79..4ff03c3f62cd 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -940,7 +940,13 @@ static void ipsec_esp_unmap(struct device *dev, struct talitos_edesc *edesc, struct aead_request *areq) { - unmap_single_talitos_ptr(dev, &edesc->desc.ptr[6], DMA_FROM_DEVICE); + struct crypto_aead *aead = crypto_aead_reqtfm(areq); + struct talitos_ctx *ctx = crypto_aead_ctx(aead); + unsigned int ivsize = crypto_aead_ivsize(aead); + + if (edesc->desc.hdr & DESC_HDR_TYPE_IPSEC_ESP) + unmap_single_talitos_ptr(dev, &edesc->desc.ptr[6], + DMA_FROM_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[3], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[2], DMA_TO_DEVICE); unmap_single_talitos_ptr(dev, &edesc->desc.ptr[0], DMA_TO_DEVICE); @@ -951,6 +957,13 @@ static void ipsec_esp_unmap(struct device *dev, if (edesc->dma_len) dma_unmap_single(dev, edesc->dma_link_tbl, edesc->dma_len, DMA_BIDIRECTIONAL); + + if (!(edesc->desc.hdr & DESC_HDR_TYPE_IPSEC_ESP)) { + unsigned int dst_nents = edesc->dst_nents ? : 1; + + sg_pcopy_to_buffer(areq->dst, dst_nents, ctx->iv, ivsize, + areq->assoclen + areq->cryptlen - ivsize); + } } /* @@ -960,6 +973,8 @@ static void ipsec_esp_encrypt_done(struct device *dev, struct talitos_desc *desc, void *context, int err) { + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); struct aead_request *areq = context; struct crypto_aead *authenc = crypto_aead_reqtfm(areq); unsigned int authsize = crypto_aead_authsize(authenc); @@ -973,8 +988,11 @@ static void ipsec_esp_encrypt_done(struct device *dev, /* copy the generated ICV to dst */ if (edesc->icv_ool) { - icvdata = &edesc->link_tbl[edesc->src_nents + - edesc->dst_nents + 2]; + if (is_sec1) + icvdata = edesc->buf + areq->assoclen + areq->cryptlen; + else + icvdata = &edesc->link_tbl[edesc->src_nents + + edesc->dst_nents + 2]; sg = sg_last(areq->dst, edesc->dst_nents); memcpy((char *)sg_virt(sg) + sg->length - authsize, icvdata, authsize); @@ -995,6 +1013,8 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev, struct talitos_edesc *edesc; struct scatterlist *sg; char *oicv, *icv; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); edesc = container_of(desc, struct talitos_edesc, desc); @@ -1006,7 +1026,12 @@ static void ipsec_esp_decrypt_swauth_done(struct device *dev, icv = (char *)sg_virt(sg) + sg->length - authsize; if (edesc->dma_len) { - oicv = (char *)&edesc->link_tbl[edesc->src_nents + + if (is_sec1) + oicv = (char *)&edesc->dma_link_tbl + + req->assoclen + req->cryptlen; + else + oicv = (char *) + &edesc->link_tbl[edesc->src_nents + edesc->dst_nents + 2]; if (edesc->icv_ool) icv = oicv + authsize; @@ -1145,42 +1170,52 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, int tbl_off = 0; int sg_count, ret; int sg_link_tbl_len; + bool sync_needed = false; + struct talitos_private *priv = dev_get_drvdata(dev); + bool is_sec1 = has_ftr_sec1(priv); /* hmac key */ map_single_talitos_ptr(dev, &desc->ptr[0], ctx->authkeylen, &ctx->key, DMA_TO_DEVICE); - sg_count = dma_map_sg(dev, areq->src, edesc->src_nents ?: 1, - (areq->src == areq->dst) ? DMA_BIDIRECTIONAL - : DMA_TO_DEVICE); - /* hmac data */ - to_talitos_ptr_len(&desc->ptr[1], areq->assoclen, 0); - if (sg_count > 1 && - (ret = sg_to_link_tbl_offset(areq->src, sg_count, 0, - areq->assoclen, - &edesc->link_tbl[tbl_off])) > 1) { - to_talitos_ptr(&desc->ptr[1], edesc->dma_link_tbl + tbl_off * - sizeof(struct talitos_ptr), 0); - to_talitos_ptr_ext_set(&desc->ptr[1], DESC_PTR_LNKTBL_JUMP, 0); + sg_count = edesc->src_nents ?: 1; + if (is_sec1 && sg_count > 1) + sg_copy_to_buffer(areq->src, sg_count, edesc->buf, + areq->assoclen + cryptlen); + else + sg_count = dma_map_sg(dev, areq->src, sg_count, + (areq->src == areq->dst) ? + DMA_BIDIRECTIONAL : DMA_TO_DEVICE); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, DMA_BIDIRECTIONAL); + /* hmac data */ + ret = talitos_sg_map(dev, areq->src, areq->assoclen, edesc, + &desc->ptr[1], sg_count, 0, tbl_off); + if (ret > 1) { tbl_off += ret; - } else { - to_talitos_ptr(&desc->ptr[1], sg_dma_address(areq->src), 0); - to_talitos_ptr_ext_set(&desc->ptr[1], 0, 0); + sync_needed = true; } /* cipher iv */ - to_talitos_ptr(&desc->ptr[2], edesc->iv_dma, 0); - to_talitos_ptr_len(&desc->ptr[2], ivsize, 0); - to_talitos_ptr_ext_set(&desc->ptr[2], 0, 0); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) { + to_talitos_ptr(&desc->ptr[2], edesc->iv_dma, is_sec1); + to_talitos_ptr_len(&desc->ptr[2], ivsize, is_sec1); + to_talitos_ptr_ext_set(&desc->ptr[2], 0, is_sec1); + } else { + to_talitos_ptr(&desc->ptr[3], edesc->iv_dma, is_sec1); + to_talitos_ptr_len(&desc->ptr[3], ivsize, is_sec1); + to_talitos_ptr_ext_set(&desc->ptr[3], 0, is_sec1); + } /* cipher key */ - map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen, - (char *)&ctx->key + ctx->authkeylen, - DMA_TO_DEVICE); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) + map_single_talitos_ptr(dev, &desc->ptr[3], ctx->enckeylen, + (char *)&ctx->key + ctx->authkeylen, + DMA_TO_DEVICE); + else + map_single_talitos_ptr(dev, &desc->ptr[2], ctx->enckeylen, + (char *)&ctx->key + ctx->authkeylen, + DMA_TO_DEVICE); /* * cipher in @@ -1188,78 +1223,82 @@ static int ipsec_esp(struct talitos_edesc *edesc, struct aead_request *areq, * extent is bytes of HMAC postpended to ciphertext, * typically 12 for ipsec */ - to_talitos_ptr_len(&desc->ptr[4], cryptlen, 0); - to_talitos_ptr_ext_set(&desc->ptr[4], authsize, 0); + to_talitos_ptr_len(&desc->ptr[4], cryptlen, is_sec1); + to_talitos_ptr_ext_set(&desc->ptr[4], 0, is_sec1); sg_link_tbl_len = cryptlen; - if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) - sg_link_tbl_len += authsize; - if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[4], sg_dma_address(areq->src) + - areq->assoclen, 0); - } else if ((ret = sg_to_link_tbl_offset(areq->src, sg_count, - areq->assoclen, sg_link_tbl_len, - &edesc->link_tbl[tbl_off])) > - 1) { - to_talitos_ptr_ext_or(&desc->ptr[4], DESC_PTR_LNKTBL_JUMP, 0); - to_talitos_ptr(&desc->ptr[4], edesc->dma_link_tbl + - tbl_off * - sizeof(struct talitos_ptr), 0); - dma_sync_single_for_device(dev, edesc->dma_link_tbl, - edesc->dma_len, - DMA_BIDIRECTIONAL); - tbl_off += ret; - } else { - copy_talitos_ptr(&desc->ptr[4], &edesc->link_tbl[tbl_off], 0); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) { + to_talitos_ptr_ext_set(&desc->ptr[4], authsize, is_sec1); + + if (edesc->desc.hdr & DESC_HDR_MODE1_MDEU_CICV) + sg_link_tbl_len += authsize; } - /* cipher out */ - to_talitos_ptr_len(&desc->ptr[5], cryptlen, 0); - to_talitos_ptr_ext_set(&desc->ptr[5], authsize, 0); + sg_count = talitos_sg_map(dev, areq->src, cryptlen, edesc, + &desc->ptr[4], sg_count, areq->assoclen, + tbl_off); + + if (sg_count > 1) { + tbl_off += sg_count; + sync_needed = true; + } - if (areq->src != areq->dst) - sg_count = dma_map_sg(dev, areq->dst, edesc->dst_nents ? : 1, - DMA_FROM_DEVICE); + /* cipher out */ + if (areq->src != areq->dst) { + sg_count = edesc->dst_nents ? : 1; + if (!is_sec1 || sg_count == 1) + dma_map_sg(dev, areq->dst, sg_count, DMA_FROM_DEVICE); + } - edesc->icv_ool = false; + sg_count = talitos_sg_map(dev, areq->dst, cryptlen, edesc, + &desc->ptr[5], sg_count, areq->assoclen, + tbl_off); - if (sg_count == 1) { - to_talitos_ptr(&desc->ptr[5], sg_dma_address(areq->dst) + - areq->assoclen, 0); - } else if ((sg_count = - sg_to_link_tbl_offset(areq->dst, sg_count, - areq->assoclen, cryptlen, - &edesc->link_tbl[tbl_off])) > 1) { - struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off]; - - to_talitos_ptr(&desc->ptr[5], edesc->dma_link_tbl + - tbl_off * sizeof(struct talitos_ptr), 0); - - /* Add an entry to the link table for ICV data */ - tbl_ptr += sg_count - 1; - to_talitos_ptr_ext_set(tbl_ptr, 0, 0); - tbl_ptr++; - to_talitos_ptr_ext_set(tbl_ptr, DESC_PTR_LNKTBL_RETURN, 0); - to_talitos_ptr_len(tbl_ptr, authsize, 0); - - /* icv data follows link tables */ - to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + - (edesc->src_nents + edesc->dst_nents + - 2) * sizeof(struct talitos_ptr) + - authsize, 0); - to_talitos_ptr_ext_or(&desc->ptr[5], DESC_PTR_LNKTBL_JUMP, 0); - dma_sync_single_for_device(ctx->dev, edesc->dma_link_tbl, - edesc->dma_len, DMA_BIDIRECTIONAL); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) + to_talitos_ptr_ext_or(&desc->ptr[5], authsize, is_sec1); + if (sg_count > 1) { edesc->icv_ool = true; + sync_needed = true; + + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) { + struct talitos_ptr *tbl_ptr = &edesc->link_tbl[tbl_off]; + int offset = (edesc->src_nents + edesc->dst_nents + 2) * + sizeof(struct talitos_ptr) + authsize; + + /* Add an entry to the link table for ICV data */ + tbl_ptr += sg_count - 1; + to_talitos_ptr_ext_set(tbl_ptr, 0, is_sec1); + tbl_ptr++; + to_talitos_ptr_ext_set(tbl_ptr, DESC_PTR_LNKTBL_RETURN, + is_sec1); + to_talitos_ptr_len(tbl_ptr, authsize, is_sec1); + + /* icv data follows link tables */ + to_talitos_ptr(tbl_ptr, edesc->dma_link_tbl + offset, + is_sec1); + } } else { - copy_talitos_ptr(&desc->ptr[5], &edesc->link_tbl[tbl_off], 0); + edesc->icv_ool = false; + } + + /* ICV data */ + if (!(desc->hdr & DESC_HDR_TYPE_IPSEC_ESP)) { + to_talitos_ptr_len(&desc->ptr[6], authsize, is_sec1); + to_talitos_ptr(&desc->ptr[6], edesc->dma_link_tbl + + areq->assoclen + cryptlen, is_sec1); } /* iv out */ - map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, - DMA_FROM_DEVICE); + if (desc->hdr & DESC_HDR_TYPE_IPSEC_ESP) + map_single_talitos_ptr(dev, &desc->ptr[6], ivsize, ctx->iv, + DMA_FROM_DEVICE); + + if (sync_needed) + dma_sync_single_for_device(dev, edesc->dma_link_tbl, + edesc->dma_len, + DMA_BIDIRECTIONAL); ret = talitos_submit(dev, ctx->ch, desc, callback, areq); if (ret != -EINPROGRESS) { -- cgit v1.2.3 From e968b49f4d57a28d5e6ada6e92d4937ca0743793 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 6 Jun 2016 13:20:42 +0200 Subject: crypto: talitos - sg_to_link_tbl() not used anymore, remove it Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 4ff03c3f62cd..ff8cf39dfc63 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -1111,14 +1111,6 @@ next: return count; } -static inline int sg_to_link_tbl(struct scatterlist *sg, int sg_count, - int cryptlen, - struct talitos_ptr *link_tbl_ptr) -{ - return sg_to_link_tbl_offset(sg, sg_count, 0, cryptlen, - link_tbl_ptr); -} - int talitos_sg_map(struct device *dev, struct scatterlist *src, unsigned int len, struct talitos_edesc *edesc, struct talitos_ptr *ptr, -- cgit v1.2.3 From b00577639d1970c8075d70f6180be83574712855 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 6 Jun 2016 13:20:44 +0200 Subject: crypto: talitos - implement cra_priority SEC1 doesn't have IPSEC_ESP descriptor type but it is able to perform IPSEC using HMAC_SNOOP_NO_AFEU, which is also existing on SEC2 In order to be able to define descriptors templates for SEC1 without breaking SEC2+, we have to give lower priority to HMAC_SNOOP_NO_AFEU so that SEC2+ selects IPSEC_ESP and not HMAC_SNOOP_NO_AFEU which is less performant. This is done by adding a priority field in the template. If the field is 0, we use the default priority, otherwise we used the one in the field. Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index ff8cf39dfc63..dfd3a93e4954 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -2120,6 +2120,7 @@ static int ahash_setkey(struct crypto_ahash *tfm, const u8 *key, struct talitos_alg_template { u32 type; + u32 priority; union { struct crypto_alg crypto; struct ahash_alg hash; @@ -2897,7 +2898,10 @@ static struct talitos_crypto_alg *talitos_alg_alloc(struct device *dev, } alg->cra_module = THIS_MODULE; - alg->cra_priority = TALITOS_CRA_PRIORITY; + if (t_alg->algt.priority) + alg->cra_priority = t_alg->algt.priority; + else + alg->cra_priority = TALITOS_CRA_PRIORITY; alg->cra_alignmask = 0; alg->cra_ctxsize = sizeof(struct talitos_ctx); alg->cra_flags |= CRYPTO_ALG_KERN_DRIVER_ONLY; -- cgit v1.2.3 From 7405c8d7ff979f5a5baa79006b8d2704e316f581 Mon Sep 17 00:00:00 2001 From: LEROY Christophe Date: Mon, 6 Jun 2016 13:20:46 +0200 Subject: crypto: talitos - templates for AEAD using HMAC_SNOOP_NO_AFEU This will allow IPSEC on SEC1 Signed-off-by: Christophe Leroy Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 180 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 180 insertions(+) (limited to 'drivers/crypto') diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index dfd3a93e4954..0418a2f41dc0 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -811,6 +811,11 @@ static void talitos_unregister_rng(struct device *dev) * crypto alg */ #define TALITOS_CRA_PRIORITY 3000 +/* + * Defines a priority for doing AEAD with descriptors type + * HMAC_SNOOP_NO_AFEA (HSNA) instead of type IPSEC_ESP + */ +#define TALITOS_CRA_PRIORITY_AEAD_HSNA (TALITOS_CRA_PRIORITY - 1) #define TALITOS_MAX_KEY_SIZE 96 #define TALITOS_MAX_IV_LENGTH 16 /* max of AES_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE */ @@ -2151,6 +2156,27 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA1_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha1),cbc(aes))", + .cra_driver_name = "authenc-hmac-sha1-" + "cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA1_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2173,6 +2199,29 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA1_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha1)," + "cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha1-" + "cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA1_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2193,6 +2242,27 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA224_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha224),cbc(aes))", + .cra_driver_name = "authenc-hmac-sha224-" + "cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA224_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA224_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2215,6 +2285,29 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA224_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha224)," + "cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha224-" + "cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA224_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA224_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2235,6 +2328,27 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA256_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha256),cbc(aes))", + .cra_driver_name = "authenc-hmac-sha256-" + "cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA256_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2257,6 +2371,29 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_SHA256_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(sha256)," + "cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-sha256-" + "cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_SHA256_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2361,6 +2498,27 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_MD5_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(md5),cbc(aes))", + .cra_driver_name = "authenc-hmac-md5-" + "cbc-aes-talitos", + .cra_blocksize = AES_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = AES_BLOCK_SIZE, + .maxauthsize = MD5_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_AESU | + DESC_HDR_MODE0_AESU_CBC | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_MD5_HMAC, + }, { .type = CRYPTO_ALG_TYPE_AEAD, .alg.aead = { .base = { @@ -2382,6 +2540,28 @@ static struct talitos_alg_template driver_algs[] = { DESC_HDR_MODE1_MDEU_PAD | DESC_HDR_MODE1_MDEU_MD5_HMAC, }, + { .type = CRYPTO_ALG_TYPE_AEAD, + .priority = TALITOS_CRA_PRIORITY_AEAD_HSNA, + .alg.aead = { + .base = { + .cra_name = "authenc(hmac(md5),cbc(des3_ede))", + .cra_driver_name = "authenc-hmac-md5-" + "cbc-3des-talitos", + .cra_blocksize = DES3_EDE_BLOCK_SIZE, + .cra_flags = CRYPTO_ALG_ASYNC, + }, + .ivsize = DES3_EDE_BLOCK_SIZE, + .maxauthsize = MD5_DIGEST_SIZE, + }, + .desc_hdr_template = DESC_HDR_TYPE_HMAC_SNOOP_NO_AFEU | + DESC_HDR_SEL0_DEU | + DESC_HDR_MODE0_DEU_CBC | + DESC_HDR_MODE0_DEU_3DES | + DESC_HDR_SEL1_MDEUA | + DESC_HDR_MODE1_MDEU_INIT | + DESC_HDR_MODE1_MDEU_PAD | + DESC_HDR_MODE1_MDEU_MD5_HMAC, + }, /* ABLKCIPHER algorithms. */ { .type = CRYPTO_ALG_TYPE_ABLKCIPHER, .alg.crypto = { -- cgit v1.2.3 From 773b197972bebcbd9eb3cd4d2688a68619eef85c Mon Sep 17 00:00:00 2001 From: Bhaktipriya Shridhar Date: Wed, 8 Jun 2016 02:47:47 +0530 Subject: crypto: qat - Remove deprecated create_workqueue alloc_workqueue replaces deprecated create_workqueue(). The workqueue device_reset_wq has workitem &reset_data->reset_work per adf_reset_dev_data. The workqueue pf2vf_resp_wq is a workqueue for PF2VF responses has workitem &pf2vf_resp->pf2vf_resp_work per pf2vf_resp. The workqueue adf_vf_stop_wq is used to call adf_dev_stop() asynchronously. Dedicated workqueues have been used in all cases since the workitems on the workqueues are involved in operation of crypto which can be used in the IO path which is depended upon during memory reclaim. Hence, WQ_MEM_RECLAIM has been set to gurantee forward progress under memory pressure. Since there are only a fixed number of work items, explicit concurrency limit is unnecessary. Signed-off-by: Bhaktipriya Shridhar Acked-by: Tejun Heo Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_aer.c | 3 ++- drivers/crypto/qat/qat_common/adf_sriov.c | 2 +- drivers/crypto/qat/qat_common/adf_vf_isr.c | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index b40d9c8dad96..7bfb57f1bcb5 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -243,7 +243,8 @@ EXPORT_SYMBOL_GPL(adf_disable_aer); int adf_init_aer(void) { - device_reset_wq = create_workqueue("qat_device_reset_wq"); + device_reset_wq = alloc_workqueue("qat_device_reset_wq", + WQ_MEM_RECLAIM, 0); return !device_reset_wq ? -EFAULT : 0; } diff --git a/drivers/crypto/qat/qat_common/adf_sriov.c b/drivers/crypto/qat/qat_common/adf_sriov.c index 4a526e2f1d7f..9320ae1d005b 100644 --- a/drivers/crypto/qat/qat_common/adf_sriov.c +++ b/drivers/crypto/qat/qat_common/adf_sriov.c @@ -292,7 +292,7 @@ EXPORT_SYMBOL_GPL(adf_sriov_configure); int __init adf_init_pf_wq(void) { /* Workqueue for PF2VF responses */ - pf2vf_resp_wq = create_workqueue("qat_pf2vf_resp_wq"); + pf2vf_resp_wq = alloc_workqueue("qat_pf2vf_resp_wq", WQ_MEM_RECLAIM, 0); return !pf2vf_resp_wq ? -ENOMEM : 0; } diff --git a/drivers/crypto/qat/qat_common/adf_vf_isr.c b/drivers/crypto/qat/qat_common/adf_vf_isr.c index aa689cabedb4..bf99e11a3403 100644 --- a/drivers/crypto/qat/qat_common/adf_vf_isr.c +++ b/drivers/crypto/qat/qat_common/adf_vf_isr.c @@ -321,7 +321,7 @@ EXPORT_SYMBOL_GPL(adf_vf_isr_resource_alloc); int __init adf_init_vf_wq(void) { - adf_vf_stop_wq = create_workqueue("adf_vf_stop_wq"); + adf_vf_stop_wq = alloc_workqueue("adf_vf_stop_wq", WQ_MEM_RECLAIM, 0); return !adf_vf_stop_wq ? -EFAULT : 0; } -- cgit v1.2.3 From a6ed42dac49bfd6cb12e2dbffcee1c6d0854bd52 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 16 Jun 2016 11:05:46 +0200 Subject: crypto: caam - fix misspelled upper_32_bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An endianess fix mistakenly used higher_32_bits() instead of upper_32_bits(), and that doesn't exist: drivers/crypto/caam/desc_constr.h: In function 'append_ptr': drivers/crypto/caam/desc_constr.h:84:75: error: implicit declaration of function 'higher_32_bits' [-Werror=implicit-function-declaration] *offset = cpu_to_caam_dma(ptr); Signed-off-by: Arnd Bergmann Fixes: 261ea058f016 ("crypto: caam - handle core endianness != caam endianness") Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/regs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index 8c766cf9202c..b3c5016f6458 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -183,10 +183,10 @@ static inline u64 rd_reg64(void __iomem *reg) #ifdef CONFIG_SOC_IMX7D #define cpu_to_caam_dma(value) \ (((u64)cpu_to_caam32(lower_32_bits(value)) << 32) | \ - (u64)cpu_to_caam32(higher_32_bits(value))) + (u64)cpu_to_caam32(upper_32_bits(value))) #define caam_dma_to_cpu(value) \ (((u64)caam32_to_cpu(lower_32_bits(value)) << 32) | \ - (u64)caam32_to_cpu(higher_32_bits(value))) + (u64)caam32_to_cpu(upper_32_bits(value))) #else #define cpu_to_caam_dma(value) cpu_to_caam64(value) #define caam_dma_to_cpu(value) caam64_to_cpu(value) -- cgit v1.2.3 From 6999d504d4be3ddacd7a01ae961886a66b6a53d9 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Thu, 16 Jun 2016 16:32:55 +0300 Subject: crypto: caam - replace deprecated EXTRA_CFLAGS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit EXTRA_CFLAGS is still supported but its usage is deprecated. Signed-off-by: Tudor Ambarus Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile index 550758a333e7..3904700ef110 100644 --- a/drivers/crypto/caam/Makefile +++ b/drivers/crypto/caam/Makefile @@ -2,7 +2,7 @@ # Makefile for the CAAM backend and dependent components # ifeq ($(CONFIG_CRYPTO_DEV_FSL_CAAM_DEBUG), y) - EXTRA_CFLAGS := -DDEBUG + ccflags-y := -DDEBUG endif obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM) += caam.o -- cgit v1.2.3 From e26df73f80290efe5c772d94ac3a3cf2b077fe5b Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:31 +0200 Subject: crypto: marvell - Add a macro constant for the size of the crypto queue Adding a macro constant to be used for the size of the crypto queue, instead of using a numeric value directly. It will be easier to maintain in case we add more than one crypto queue of the same size. Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index e8ef9fd24a16..47552c12e8c6 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -31,6 +31,9 @@ #include "cesa.h" +/* Limit of the crypto queue before reaching the backlog */ +#define CESA_CRYPTO_DEFAULT_MAX_QLEN 50 + static int allhwsupport = !IS_ENABLED(CONFIG_CRYPTO_DEV_MV_CESA); module_param_named(allhwsupport, allhwsupport, int, 0444); MODULE_PARM_DESC(allhwsupport, "Enable support for all hardware (even it if overlaps with the mv_cesa driver)"); @@ -416,7 +419,7 @@ static int mv_cesa_probe(struct platform_device *pdev) return -ENOMEM; spin_lock_init(&cesa->lock); - crypto_init_queue(&cesa->queue, 50); + crypto_init_queue(&cesa->queue, CESA_CRYPTO_DEFAULT_MAX_QLEN); res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); cesa->regs = devm_ioremap_resource(dev, res); if (IS_ERR(cesa->regs)) -- cgit v1.2.3 From f62830886f6b8cb2f1c55e23cb5eab51c4b9ef2c Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:32 +0200 Subject: crypto: marvell - Check engine is not already running when enabling a req Add a BUG_ON() call when the driver tries to launch a crypto request while the engine is still processing the previous one. This replaces a silent system hang by a verbose kernel panic with the associated backtrace to let the user know that something went wrong in the CESA driver. Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cipher.c | 2 ++ drivers/crypto/marvell/hash.c | 2 ++ drivers/crypto/marvell/tdma.c | 2 ++ 3 files changed, 6 insertions(+) (limited to 'drivers/crypto') diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index dcf1fceb9336..8c1432e44338 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -106,6 +106,8 @@ static void mv_cesa_ablkcipher_std_step(struct ablkcipher_request *req) mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); + BUG_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 7a5058da9151..204c29358a7d 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -237,6 +237,8 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) mv_cesa_set_int_mask(engine, CESA_SA_INT_ACCEL0_DONE); writel_relaxed(CESA_SA_CFG_PARA_DIS, engine->regs + CESA_SA_CFG); + BUG_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index 0ad8f1ecf175..e416a7455d28 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -53,6 +53,8 @@ void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq) engine->regs + CESA_SA_CFG); writel_relaxed(dreq->chain.first->cur_dma, engine->regs + CESA_TDMA_NEXT_ADDR); + BUG_ON(readl(engine->regs + CESA_SA_CMD) & + CESA_SA_CMD_EN_CESA_SA_ACCL0); writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } -- cgit v1.2.3 From b99acf79a17bf402ea5759ad9d44eba3ed837a59 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:33 +0200 Subject: crypto: marvell - Fix wrong type check in dma functions So far, the way that the type of a TDMA operation was checked was wrong. We have to use the type mask in order to get the right part of the flag containing the type of the operation. Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/tdma.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index e416a7455d28..4d23bf95be9c 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -64,8 +64,9 @@ void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq) for (tdma = dreq->chain.first; tdma;) { struct mv_cesa_tdma_desc *old_tdma = tdma; + u32 type = tdma->flags & CESA_TDMA_TYPE_MSK; - if (tdma->flags & CESA_TDMA_OP) + if (type == CESA_TDMA_OP) dma_pool_free(cesa_dev->dma->op_pool, tdma->op, le32_to_cpu(tdma->src)); @@ -90,7 +91,7 @@ void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq, if (tdma->flags & CESA_TDMA_SRC_IN_SRAM) tdma->src = cpu_to_le32(tdma->src + engine->sram_dma); - if (tdma->flags & CESA_TDMA_OP) + if ((tdma->flags & CESA_TDMA_TYPE_MSK) == CESA_TDMA_OP) mv_cesa_adjust_op(engine, tdma->op); } } -- cgit v1.2.3 From bac8e805a30dc2a29c3fdde9d822e59c75e710d7 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:34 +0200 Subject: crypto: marvell - Copy IV vectors by DMA transfers for acipher requests Add a TDMA descriptor at the end of the request for copying the output IV vector via a DMA transfer. This is a good way for offloading as much as processing as possible to the DMA and the crypto engine. This is also required for processing multiple cipher requests in chained mode, otherwise the content of the IV vector would be overwritten by the last processed request. Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa.c | 4 ++++ drivers/crypto/marvell/cesa.h | 5 +++++ drivers/crypto/marvell/cipher.c | 31 ++++++++++++++++++++++--------- drivers/crypto/marvell/tdma.c | 29 +++++++++++++++++++++++++++++ 4 files changed, 60 insertions(+), 9 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index 47552c12e8c6..9063f56d8725 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -312,6 +312,10 @@ static int mv_cesa_dev_dma_init(struct mv_cesa_dev *cesa) if (!dma->padding_pool) return -ENOMEM; + dma->iv_pool = dmam_pool_create("cesa_iv", dev, 16, 1, 0); + if (!dma->iv_pool) + return -ENOMEM; + cesa->dma = dma; return 0; diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h index 74071e45ada0..685a627bd9ae 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa.h @@ -275,6 +275,7 @@ struct mv_cesa_op_ctx { #define CESA_TDMA_DUMMY 0 #define CESA_TDMA_DATA 1 #define CESA_TDMA_OP 2 +#define CESA_TDMA_IV 3 /** * struct mv_cesa_tdma_desc - TDMA descriptor @@ -390,6 +391,7 @@ struct mv_cesa_dev_dma { struct dma_pool *op_pool; struct dma_pool *cache_pool; struct dma_pool *padding_pool; + struct dma_pool *iv_pool; }; /** @@ -790,6 +792,9 @@ mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain) memset(chain, 0, sizeof(*chain)); } +int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src, + u32 size, u32 flags, gfp_t gfp_flags); + struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain, const struct mv_cesa_op_ctx *op_templ, bool skip_ctx, diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index 8c1432e44338..90a2a5cf05f6 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -118,6 +118,7 @@ static int mv_cesa_ablkcipher_std_process(struct ablkcipher_request *req, struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; struct mv_cesa_engine *engine = sreq->base.engine; size_t len; + unsigned int ivsize; len = sg_pcopy_from_buffer(req->dst, creq->dst_nents, engine->sram + CESA_SA_DATA_SRAM_OFFSET, @@ -127,6 +128,10 @@ static int mv_cesa_ablkcipher_std_process(struct ablkcipher_request *req, if (sreq->offset < req->nbytes) return -EINPROGRESS; + ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req)); + memcpy_fromio(req->info, + engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET, ivsize); + return 0; } @@ -135,21 +140,20 @@ static int mv_cesa_ablkcipher_process(struct crypto_async_request *req, { struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_tdma_req *dreq; + unsigned int ivsize; int ret; - if (creq->req.base.type == CESA_DMA_REQ) - ret = mv_cesa_dma_process(&creq->req.dma, status); - else - ret = mv_cesa_ablkcipher_std_process(ablkreq, status); + if (creq->req.base.type == CESA_STD_REQ) + return mv_cesa_ablkcipher_std_process(ablkreq, status); + ret = mv_cesa_dma_process(&creq->req.dma, status); if (ret) return ret; - memcpy_fromio(ablkreq->info, - engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET, - crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq))); + dreq = &creq->req.dma; + ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq)); + memcpy_fromio(ablkreq->info, dreq->chain.last->data, ivsize); return 0; } @@ -302,6 +306,7 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, struct mv_cesa_tdma_chain chain; bool skip_ctx = false; int ret; + unsigned int ivsize; dreq->base.type = CESA_DMA_REQ; dreq->chain.first = NULL; @@ -360,6 +365,14 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, } while (mv_cesa_ablkcipher_req_iter_next_op(&iter)); + /* Add output data for IV */ + ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req)); + ret = mv_cesa_dma_add_iv_op(&chain, CESA_SA_CRYPT_IV_SRAM_OFFSET, + ivsize, CESA_TDMA_SRC_IN_SRAM, flags); + + if (ret) + goto err_free_tdma; + dreq->chain = chain; return 0; diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index 4d23bf95be9c..dc0733538606 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -69,6 +69,9 @@ void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq) if (type == CESA_TDMA_OP) dma_pool_free(cesa_dev->dma->op_pool, tdma->op, le32_to_cpu(tdma->src)); + else if (type == CESA_TDMA_IV) + dma_pool_free(cesa_dev->dma->iv_pool, tdma->data, + le32_to_cpu(tdma->dst)); tdma = tdma->next; dma_pool_free(cesa_dev->dma->tdma_desc_pool, old_tdma, @@ -120,6 +123,32 @@ mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags) return new_tdma; } +int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src, + u32 size, u32 flags, gfp_t gfp_flags) +{ + + struct mv_cesa_tdma_desc *tdma; + u8 *iv; + dma_addr_t dma_handle; + + tdma = mv_cesa_dma_add_desc(chain, gfp_flags); + if (IS_ERR(tdma)) + return PTR_ERR(tdma); + + iv = dma_pool_alloc(cesa_dev->dma->iv_pool, flags, &dma_handle); + if (!iv) + return -ENOMEM; + + tdma->byte_cnt = cpu_to_le32(size | BIT(31)); + tdma->src = src; + tdma->dst = cpu_to_le32(dma_handle); + tdma->data = iv; + + flags &= (CESA_TDMA_DST_IN_SRAM | CESA_TDMA_SRC_IN_SRAM); + tdma->flags = flags | CESA_TDMA_IV; + return 0; +} + struct mv_cesa_op_ctx *mv_cesa_dma_add_op(struct mv_cesa_tdma_chain *chain, const struct mv_cesa_op_ctx *op_templ, bool skip_ctx, -- cgit v1.2.3 From 53da740fed302fbecd66d21cad1c4115b9aaab78 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:35 +0200 Subject: crypto: marvell - Move tdma chain out of mv_cesa_tdma_req and remove it Currently, the only way to access the tdma chain is to use the 'req' union from a mv_cesa_{ablkcipher,ahash}. This will soon become a problem if we want to handle the TDMA chaining vs standard/non-DMA processing in a generic way (with generic functions at the cesa.c level detecting whether the request should be queued at the DMA level or not). Hence the decision to move the chain field a the mv_cesa_req level at the expense of adding 2 void * fields to all request contexts (including non-DMA ones) and to remove the type completly. To limit the overhead, we get rid of the type field, which can now be deduced from the req->chain.first value. Once these changes are done the union is no longer needed, so remove it and move mv_cesa_ablkcipher_std_req and mv_cesa_req to mv_cesa_ablkcipher_req directly. There are also no needs to keep the 'base' field into the union of mv_cesa_ahash_req, so move it into the upper structure. Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa.c | 3 +- drivers/crypto/marvell/cesa.h | 44 +++++++++++----------------- drivers/crypto/marvell/cipher.c | 63 +++++++++++++++++++++------------------- drivers/crypto/marvell/hash.c | 64 +++++++++++++++++++---------------------- drivers/crypto/marvell/tdma.c | 8 +++--- 5 files changed, 84 insertions(+), 98 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index 9063f56d8725..f407bacdb021 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -111,7 +111,8 @@ static irqreturn_t mv_cesa_int(int irq, void *priv) return ret; } -int mv_cesa_queue_req(struct crypto_async_request *req) +int mv_cesa_queue_req(struct crypto_async_request *req, + struct mv_cesa_req *creq) { int ret; int i; diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h index 685a627bd9ae..e67e3f17eb01 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa.h @@ -509,21 +509,11 @@ enum mv_cesa_req_type { /** * struct mv_cesa_req - CESA request - * @type: request type * @engine: engine associated with this request + * @chain: list of tdma descriptors associated with this request */ struct mv_cesa_req { - enum mv_cesa_req_type type; struct mv_cesa_engine *engine; -}; - -/** - * struct mv_cesa_tdma_req - CESA TDMA request - * @base: base information - * @chain: TDMA chain - */ -struct mv_cesa_tdma_req { - struct mv_cesa_req base; struct mv_cesa_tdma_chain chain; }; @@ -540,13 +530,11 @@ struct mv_cesa_sg_std_iter { /** * struct mv_cesa_ablkcipher_std_req - cipher standard request - * @base: base information * @op: operation context * @offset: current operation offset * @size: size of the crypto operation */ struct mv_cesa_ablkcipher_std_req { - struct mv_cesa_req base; struct mv_cesa_op_ctx op; unsigned int offset; unsigned int size; @@ -560,34 +548,27 @@ struct mv_cesa_ablkcipher_std_req { * @dst_nents: number of entries in the dest sg list */ struct mv_cesa_ablkcipher_req { - union { - struct mv_cesa_req base; - struct mv_cesa_tdma_req dma; - struct mv_cesa_ablkcipher_std_req std; - } req; + struct mv_cesa_req base; + struct mv_cesa_ablkcipher_std_req std; int src_nents; int dst_nents; }; /** * struct mv_cesa_ahash_std_req - standard hash request - * @base: base information * @offset: current operation offset */ struct mv_cesa_ahash_std_req { - struct mv_cesa_req base; unsigned int offset; }; /** * struct mv_cesa_ahash_dma_req - DMA hash request - * @base: base information * @padding: padding buffer * @padding_dma: DMA address of the padding buffer * @cache_dma: DMA address of the cache buffer */ struct mv_cesa_ahash_dma_req { - struct mv_cesa_tdma_req base; u8 *padding; dma_addr_t padding_dma; u8 *cache; @@ -606,8 +587,8 @@ struct mv_cesa_ahash_dma_req { * @state: hash state */ struct mv_cesa_ahash_req { + struct mv_cesa_req base; union { - struct mv_cesa_req base; struct mv_cesa_ahash_dma_req dma; struct mv_cesa_ahash_std_req std; } req; @@ -625,6 +606,12 @@ struct mv_cesa_ahash_req { extern struct mv_cesa_dev *cesa_dev; +static inline enum mv_cesa_req_type +mv_cesa_req_get_type(struct mv_cesa_req *req) +{ + return req->chain.first ? CESA_DMA_REQ : CESA_STD_REQ; +} + static inline void mv_cesa_update_op_cfg(struct mv_cesa_op_ctx *op, u32 cfg, u32 mask) { @@ -697,7 +684,8 @@ static inline bool mv_cesa_mac_op_is_first_frag(const struct mv_cesa_op_ctx *op) CESA_SA_DESC_CFG_FIRST_FRAG; } -int mv_cesa_queue_req(struct crypto_async_request *req); +int mv_cesa_queue_req(struct crypto_async_request *req, + struct mv_cesa_req *creq); /* * Helper function that indicates whether a crypto request needs to be @@ -767,9 +755,9 @@ static inline bool mv_cesa_req_dma_iter_next_op(struct mv_cesa_dma_iter *iter) return iter->op_len; } -void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq); +void mv_cesa_dma_step(struct mv_cesa_req *dreq); -static inline int mv_cesa_dma_process(struct mv_cesa_tdma_req *dreq, +static inline int mv_cesa_dma_process(struct mv_cesa_req *dreq, u32 status) { if (!(status & CESA_SA_INT_ACC0_IDMA_DONE)) @@ -781,10 +769,10 @@ static inline int mv_cesa_dma_process(struct mv_cesa_tdma_req *dreq, return 0; } -void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq, +void mv_cesa_dma_prepare(struct mv_cesa_req *dreq, struct mv_cesa_engine *engine); +void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq); -void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq); static inline void mv_cesa_tdma_desc_iter_init(struct mv_cesa_tdma_chain *chain) diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index 90a2a5cf05f6..7699528957f4 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -70,22 +70,22 @@ mv_cesa_ablkcipher_dma_cleanup(struct ablkcipher_request *req) dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_BIDIRECTIONAL); } - mv_cesa_dma_cleanup(&creq->req.dma); + mv_cesa_dma_cleanup(&creq->base); } static inline void mv_cesa_ablkcipher_cleanup(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ablkcipher_dma_cleanup(req); } static void mv_cesa_ablkcipher_std_step(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; + struct mv_cesa_engine *engine = creq->base.engine; size_t len = min_t(size_t, req->nbytes - sreq->offset, CESA_SA_SRAM_PAYLOAD_SIZE); @@ -115,8 +115,8 @@ static int mv_cesa_ablkcipher_std_process(struct ablkcipher_request *req, u32 status) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; + struct mv_cesa_engine *engine = creq->base.engine; size_t len; unsigned int ivsize; @@ -140,20 +140,19 @@ static int mv_cesa_ablkcipher_process(struct crypto_async_request *req, { struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); - struct mv_cesa_tdma_req *dreq; + struct mv_cesa_req *basereq = &creq->base; unsigned int ivsize; int ret; - if (creq->req.base.type == CESA_STD_REQ) + if (mv_cesa_req_get_type(basereq) == CESA_STD_REQ) return mv_cesa_ablkcipher_std_process(ablkreq, status); - ret = mv_cesa_dma_process(&creq->req.dma, status); + ret = mv_cesa_dma_process(basereq, status); if (ret) return ret; - dreq = &creq->req.dma; ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq)); - memcpy_fromio(ablkreq->info, dreq->chain.last->data, ivsize); + memcpy_fromio(ablkreq->info, basereq->chain.last->data, ivsize); return 0; } @@ -163,8 +162,8 @@ static void mv_cesa_ablkcipher_step(struct crypto_async_request *req) struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); - if (creq->req.base.type == CESA_DMA_REQ) - mv_cesa_dma_step(&creq->req.dma); + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + mv_cesa_dma_step(&creq->base); else mv_cesa_ablkcipher_std_step(ablkreq); } @@ -173,17 +172,17 @@ static inline void mv_cesa_ablkcipher_dma_prepare(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_tdma_req *dreq = &creq->req.dma; + struct mv_cesa_req *basereq = &creq->base; - mv_cesa_dma_prepare(dreq, dreq->base.engine); + mv_cesa_dma_prepare(basereq, basereq->engine); } static inline void mv_cesa_ablkcipher_std_prepare(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; + struct mv_cesa_engine *engine = creq->base.engine; sreq->size = 0; sreq->offset = 0; @@ -196,9 +195,9 @@ static inline void mv_cesa_ablkcipher_prepare(struct crypto_async_request *req, { struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); - creq->req.base.engine = engine; + creq->base.engine = engine; - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ablkcipher_dma_prepare(ablkreq); else mv_cesa_ablkcipher_std_prepare(ablkreq); @@ -301,16 +300,15 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; - struct mv_cesa_tdma_req *dreq = &creq->req.dma; + struct mv_cesa_req *basereq = &creq->base; struct mv_cesa_ablkcipher_dma_iter iter; struct mv_cesa_tdma_chain chain; bool skip_ctx = false; int ret; unsigned int ivsize; - dreq->base.type = CESA_DMA_REQ; - dreq->chain.first = NULL; - dreq->chain.last = NULL; + basereq->chain.first = NULL; + basereq->chain.last = NULL; if (req->src != req->dst) { ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, @@ -373,12 +371,12 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, if (ret) goto err_free_tdma; - dreq->chain = chain; + basereq->chain = chain; return 0; err_free_tdma: - mv_cesa_dma_cleanup(dreq); + mv_cesa_dma_cleanup(basereq); if (req->dst != req->src) dma_unmap_sg(cesa_dev->dev, req->dst, creq->dst_nents, DMA_FROM_DEVICE); @@ -395,11 +393,13 @@ mv_cesa_ablkcipher_std_req_init(struct ablkcipher_request *req, const struct mv_cesa_op_ctx *op_templ) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_ablkcipher_std_req *sreq = &creq->req.std; + struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; + struct mv_cesa_req *basereq = &creq->base; - sreq->base.type = CESA_STD_REQ; sreq->op = *op_templ; sreq->skip_ctx = false; + basereq->chain.first = NULL; + basereq->chain.last = NULL; return 0; } @@ -441,6 +441,7 @@ static int mv_cesa_ablkcipher_req_init(struct ablkcipher_request *req, static int mv_cesa_des_op(struct ablkcipher_request *req, struct mv_cesa_op_ctx *tmpl) { + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(req->base.tfm); int ret; @@ -453,7 +454,7 @@ static int mv_cesa_des_op(struct ablkcipher_request *req, if (ret) return ret; - ret = mv_cesa_queue_req(&req->base); + ret = mv_cesa_queue_req(&req->base, &creq->base); if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ablkcipher_cleanup(req); @@ -561,6 +562,7 @@ struct crypto_alg mv_cesa_cbc_des_alg = { static int mv_cesa_des3_op(struct ablkcipher_request *req, struct mv_cesa_op_ctx *tmpl) { + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); struct mv_cesa_des3_ctx *ctx = crypto_tfm_ctx(req->base.tfm); int ret; @@ -573,7 +575,7 @@ static int mv_cesa_des3_op(struct ablkcipher_request *req, if (ret) return ret; - ret = mv_cesa_queue_req(&req->base); + ret = mv_cesa_queue_req(&req->base, &creq->base); if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ablkcipher_cleanup(req); @@ -687,6 +689,7 @@ struct crypto_alg mv_cesa_cbc_des3_ede_alg = { static int mv_cesa_aes_op(struct ablkcipher_request *req, struct mv_cesa_op_ctx *tmpl) { + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); struct mv_cesa_aes_ctx *ctx = crypto_tfm_ctx(req->base.tfm); int ret, i; u32 *key; @@ -715,7 +718,7 @@ static int mv_cesa_aes_op(struct ablkcipher_request *req, if (ret) return ret; - ret = mv_cesa_queue_req(&req->base); + ret = mv_cesa_queue_req(&req->base, &creq->base); if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ablkcipher_cleanup(req); diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 204c29358a7d..03aaa39d6fd6 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -103,14 +103,14 @@ static inline void mv_cesa_ahash_dma_cleanup(struct ahash_request *req) dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE); mv_cesa_ahash_dma_free_cache(&creq->req.dma); - mv_cesa_dma_cleanup(&creq->req.dma.base); + mv_cesa_dma_cleanup(&creq->base); } static inline void mv_cesa_ahash_cleanup(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ahash_dma_cleanup(req); } @@ -118,7 +118,7 @@ static void mv_cesa_ahash_last_cleanup(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ahash_dma_last_cleanup(req); } @@ -157,7 +157,7 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_ahash_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_engine *engine = creq->base.engine; struct mv_cesa_op_ctx *op; unsigned int new_cache_ptr = 0; u32 frag_mode; @@ -256,16 +256,16 @@ static int mv_cesa_ahash_std_process(struct ahash_request *req, u32 status) static inline void mv_cesa_ahash_dma_prepare(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); - struct mv_cesa_tdma_req *dreq = &creq->req.dma.base; + struct mv_cesa_req *basereq = &creq->base; - mv_cesa_dma_prepare(dreq, dreq->base.engine); + mv_cesa_dma_prepare(basereq, basereq->engine); } static void mv_cesa_ahash_std_prepare(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_ahash_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = sreq->base.engine; + struct mv_cesa_engine *engine = creq->base.engine; sreq->offset = 0; mv_cesa_adjust_op(engine, &creq->op_tmpl); @@ -277,8 +277,8 @@ static void mv_cesa_ahash_step(struct crypto_async_request *req) struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); - if (creq->req.base.type == CESA_DMA_REQ) - mv_cesa_dma_step(&creq->req.dma.base); + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + mv_cesa_dma_step(&creq->base); else mv_cesa_ahash_std_step(ahashreq); } @@ -287,12 +287,12 @@ static int mv_cesa_ahash_process(struct crypto_async_request *req, u32 status) { struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); - struct mv_cesa_engine *engine = creq->req.base.engine; + struct mv_cesa_engine *engine = creq->base.engine; unsigned int digsize; int ret, i; - if (creq->req.base.type == CESA_DMA_REQ) - ret = mv_cesa_dma_process(&creq->req.dma.base, status); + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) + ret = mv_cesa_dma_process(&creq->base, status); else ret = mv_cesa_ahash_std_process(ahashreq, status); @@ -338,9 +338,9 @@ static void mv_cesa_ahash_prepare(struct crypto_async_request *req, unsigned int digsize; int i; - creq->req.base.engine = engine; + creq->base.engine = engine; - if (creq->req.base.type == CESA_DMA_REQ) + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) mv_cesa_ahash_dma_prepare(ahashreq); else mv_cesa_ahash_std_prepare(ahashreq); @@ -555,15 +555,14 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); gfp_t flags = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC; - struct mv_cesa_ahash_dma_req *ahashdreq = &creq->req.dma; - struct mv_cesa_tdma_req *dreq = &ahashdreq->base; + struct mv_cesa_req *basereq = &creq->base; struct mv_cesa_ahash_dma_iter iter; struct mv_cesa_op_ctx *op = NULL; unsigned int frag_len; int ret; - dreq->chain.first = NULL; - dreq->chain.last = NULL; + basereq->chain.first = NULL; + basereq->chain.last = NULL; if (creq->src_nents) { ret = dma_map_sg(cesa_dev->dev, req->src, creq->src_nents, @@ -574,14 +573,14 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) } } - mv_cesa_tdma_desc_iter_init(&dreq->chain); + mv_cesa_tdma_desc_iter_init(&basereq->chain); mv_cesa_ahash_req_iter_init(&iter, req); /* * Add the cache (left-over data from a previous block) first. * This will never overflow the SRAM size. */ - ret = mv_cesa_ahash_dma_add_cache(&dreq->chain, &iter, creq, flags); + ret = mv_cesa_ahash_dma_add_cache(&basereq->chain, &iter, creq, flags); if (ret) goto err_free_tdma; @@ -592,7 +591,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) * data. We intentionally do not add the final op block. */ while (true) { - ret = mv_cesa_dma_add_op_transfers(&dreq->chain, + ret = mv_cesa_dma_add_op_transfers(&basereq->chain, &iter.base, &iter.src, flags); if (ret) @@ -603,7 +602,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (!mv_cesa_ahash_req_iter_next_op(&iter)) break; - op = mv_cesa_dma_add_frag(&dreq->chain, &creq->op_tmpl, + op = mv_cesa_dma_add_frag(&basereq->chain, &creq->op_tmpl, frag_len, flags); if (IS_ERR(op)) { ret = PTR_ERR(op); @@ -621,10 +620,10 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) * operation, which depends whether this is the final request. */ if (creq->last_req) - op = mv_cesa_ahash_dma_last_req(&dreq->chain, &iter, creq, + op = mv_cesa_ahash_dma_last_req(&basereq->chain, &iter, creq, frag_len, flags); else if (frag_len) - op = mv_cesa_dma_add_frag(&dreq->chain, &creq->op_tmpl, + op = mv_cesa_dma_add_frag(&basereq->chain, &creq->op_tmpl, frag_len, flags); if (IS_ERR(op)) { @@ -634,7 +633,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) if (op) { /* Add dummy desc to wait for crypto operation end */ - ret = mv_cesa_dma_add_dummy_end(&dreq->chain, flags); + ret = mv_cesa_dma_add_dummy_end(&basereq->chain, flags); if (ret) goto err_free_tdma; } @@ -648,7 +647,7 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) return 0; err_free_tdma: - mv_cesa_dma_cleanup(dreq); + mv_cesa_dma_cleanup(basereq); dma_unmap_sg(cesa_dev->dev, req->src, creq->src_nents, DMA_TO_DEVICE); err: @@ -662,11 +661,6 @@ static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached) struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); int ret; - if (cesa_dev->caps->has_tdma) - creq->req.base.type = CESA_DMA_REQ; - else - creq->req.base.type = CESA_STD_REQ; - creq->src_nents = sg_nents_for_len(req->src, req->nbytes); if (creq->src_nents < 0) { dev_err(cesa_dev->dev, "Invalid number of src SG"); @@ -680,7 +674,7 @@ static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached) if (*cached) return 0; - if (creq->req.base.type == CESA_DMA_REQ) + if (cesa_dev->caps->has_tdma) ret = mv_cesa_ahash_dma_req_init(req); return ret; @@ -700,7 +694,7 @@ static int mv_cesa_ahash_update(struct ahash_request *req) if (cached) return 0; - ret = mv_cesa_queue_req(&req->base); + ret = mv_cesa_queue_req(&req->base, &creq->base); if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ahash_cleanup(req); @@ -725,7 +719,7 @@ static int mv_cesa_ahash_final(struct ahash_request *req) if (cached) return 0; - ret = mv_cesa_queue_req(&req->base); + ret = mv_cesa_queue_req(&req->base, &creq->base); if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ahash_cleanup(req); @@ -750,7 +744,7 @@ static int mv_cesa_ahash_finup(struct ahash_request *req) if (cached) return 0; - ret = mv_cesa_queue_req(&req->base); + ret = mv_cesa_queue_req(&req->base, &creq->base); if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ahash_cleanup(req); diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index dc0733538606..f9b8d2cfa1bc 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -37,9 +37,9 @@ bool mv_cesa_req_dma_iter_next_transfer(struct mv_cesa_dma_iter *iter, return true; } -void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq) +void mv_cesa_dma_step(struct mv_cesa_req *dreq) { - struct mv_cesa_engine *engine = dreq->base.engine; + struct mv_cesa_engine *engine = dreq->engine; writel_relaxed(0, engine->regs + CESA_SA_CFG); @@ -58,7 +58,7 @@ void mv_cesa_dma_step(struct mv_cesa_tdma_req *dreq) writel(CESA_SA_CMD_EN_CESA_SA_ACCL0, engine->regs + CESA_SA_CMD); } -void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq) +void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq) { struct mv_cesa_tdma_desc *tdma; @@ -82,7 +82,7 @@ void mv_cesa_dma_cleanup(struct mv_cesa_tdma_req *dreq) dreq->chain.last = NULL; } -void mv_cesa_dma_prepare(struct mv_cesa_tdma_req *dreq, +void mv_cesa_dma_prepare(struct mv_cesa_req *dreq, struct mv_cesa_engine *engine) { struct mv_cesa_tdma_desc *tdma; -- cgit v1.2.3 From 1bf6682cb31dff64c11564fe0c12ddd2cda23626 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:36 +0200 Subject: crypto: marvell - Add a complete operation for async requests So far, the 'process' operation was used to check if the current request was correctly handled by the engine, if it was the case it copied information from the SRAM to the main memory. Now, we split this operation. We keep the 'process' operation, which still checks if the request was correctly handled by the engine or not, then we add a new operation for completion. The 'complete' method copies the content of the SRAM to memory. This will soon become useful if we want to call the process and the complete operations from different locations depending on the type of the request (different cleanup logic). Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa.c | 1 + drivers/crypto/marvell/cesa.h | 3 +++ drivers/crypto/marvell/cipher.c | 28 +++++++++++++++++++++++----- drivers/crypto/marvell/hash.c | 22 ++++++++++++---------- 4 files changed, 39 insertions(+), 15 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index f407bacdb021..40d4add19947 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -98,6 +98,7 @@ static irqreturn_t mv_cesa_int(int irq, void *priv) engine->req = NULL; mv_cesa_dequeue_req_unlocked(engine); spin_unlock_bh(&engine->lock); + ctx->ops->complete(req); ctx->ops->cleanup(req); local_bh_disable(); req->complete(req, res); diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h index e67e3f17eb01..d7493351f14b 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa.h @@ -456,6 +456,8 @@ struct mv_cesa_engine { * code) * @step: launch the crypto operation on the next chunk * @cleanup: cleanup the crypto request (release associated data) + * @complete: complete the request, i.e copy result or context from sram when + * needed. */ struct mv_cesa_req_ops { void (*prepare)(struct crypto_async_request *req, @@ -463,6 +465,7 @@ struct mv_cesa_req_ops { int (*process)(struct crypto_async_request *req, u32 status); void (*step)(struct crypto_async_request *req); void (*cleanup)(struct crypto_async_request *req); + void (*complete)(struct crypto_async_request *req); }; /** diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index 7699528957f4..f994469feeb9 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -118,7 +118,6 @@ static int mv_cesa_ablkcipher_std_process(struct ablkcipher_request *req, struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; struct mv_cesa_engine *engine = creq->base.engine; size_t len; - unsigned int ivsize; len = sg_pcopy_from_buffer(req->dst, creq->dst_nents, engine->sram + CESA_SA_DATA_SRAM_OFFSET, @@ -128,10 +127,6 @@ static int mv_cesa_ablkcipher_std_process(struct ablkcipher_request *req, if (sreq->offset < req->nbytes) return -EINPROGRESS; - ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(req)); - memcpy_fromio(req->info, - engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET, ivsize); - return 0; } @@ -211,11 +206,34 @@ mv_cesa_ablkcipher_req_cleanup(struct crypto_async_request *req) mv_cesa_ablkcipher_cleanup(ablkreq); } +static void +mv_cesa_ablkcipher_complete(struct crypto_async_request *req) +{ + struct ablkcipher_request *ablkreq = ablkcipher_request_cast(req); + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(ablkreq); + struct mv_cesa_engine *engine = creq->base.engine; + unsigned int ivsize; + + ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq)); + + if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) { + struct mv_cesa_req *basereq; + + basereq = &creq->base; + memcpy(ablkreq->info, basereq->chain.last->data, ivsize); + } else { + memcpy_fromio(ablkreq->info, + engine->sram + CESA_SA_CRYPT_IV_SRAM_OFFSET, + ivsize); + } +} + static const struct mv_cesa_req_ops mv_cesa_ablkcipher_req_ops = { .step = mv_cesa_ablkcipher_step, .process = mv_cesa_ablkcipher_process, .prepare = mv_cesa_ablkcipher_prepare, .cleanup = mv_cesa_ablkcipher_req_cleanup, + .complete = mv_cesa_ablkcipher_complete, }; static int mv_cesa_ablkcipher_cra_init(struct crypto_tfm *tfm) diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 03aaa39d6fd6..faa677a3e881 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -287,17 +287,20 @@ static int mv_cesa_ahash_process(struct crypto_async_request *req, u32 status) { struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); - struct mv_cesa_engine *engine = creq->base.engine; - unsigned int digsize; - int ret, i; if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) - ret = mv_cesa_dma_process(&creq->base, status); - else - ret = mv_cesa_ahash_std_process(ahashreq, status); + return mv_cesa_dma_process(&creq->base, status); - if (ret == -EINPROGRESS) - return ret; + return mv_cesa_ahash_std_process(ahashreq, status); +} + +static void mv_cesa_ahash_complete(struct crypto_async_request *req) +{ + struct ahash_request *ahashreq = ahash_request_cast(req); + struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); + struct mv_cesa_engine *engine = creq->base.engine; + unsigned int digsize; + int i; digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq)); for (i = 0; i < digsize / 4; i++) @@ -326,8 +329,6 @@ static int mv_cesa_ahash_process(struct crypto_async_request *req, u32 status) result[i] = cpu_to_be32(creq->state[i]); } } - - return ret; } static void mv_cesa_ahash_prepare(struct crypto_async_request *req, @@ -366,6 +367,7 @@ static const struct mv_cesa_req_ops mv_cesa_ahash_req_ops = { .process = mv_cesa_ahash_process, .prepare = mv_cesa_ahash_prepare, .cleanup = mv_cesa_ahash_req_cleanup, + .complete = mv_cesa_ahash_complete, }; static int mv_cesa_ahash_init(struct ahash_request *req, -- cgit v1.2.3 From 2786cee8e50bb4b4303dc22665f391b72318fa84 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:37 +0200 Subject: crypto: marvell - Move SRAM I/O operations to step functions Currently the crypto requests were sent to engines sequentially. This commit moves the SRAM I/O operations from the prepare to the step functions. It provides flexibility for future works and allow to prepare a request while the engine is running. Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cipher.c | 6 +++--- drivers/crypto/marvell/hash.c | 18 +++++++++--------- 2 files changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index f994469feeb9..1e9c722b1bb0 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -89,6 +89,9 @@ static void mv_cesa_ablkcipher_std_step(struct ablkcipher_request *req) size_t len = min_t(size_t, req->nbytes - sreq->offset, CESA_SA_SRAM_PAYLOAD_SIZE); + mv_cesa_adjust_op(engine, &sreq->op); + memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op)); + len = sg_pcopy_to_buffer(req->src, creq->src_nents, engine->sram + CESA_SA_DATA_SRAM_OFFSET, len, sreq->offset); @@ -177,12 +180,9 @@ mv_cesa_ablkcipher_std_prepare(struct ablkcipher_request *req) { struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); struct mv_cesa_ablkcipher_std_req *sreq = &creq->std; - struct mv_cesa_engine *engine = creq->base.engine; sreq->size = 0; sreq->offset = 0; - mv_cesa_adjust_op(engine, &sreq->op); - memcpy_toio(engine->sram, &sreq->op, sizeof(sreq->op)); } static inline void mv_cesa_ablkcipher_prepare(struct crypto_async_request *req, diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index faa677a3e881..4146757d34eb 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -162,6 +162,15 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) unsigned int new_cache_ptr = 0; u32 frag_mode; size_t len; + unsigned int digsize; + int i; + + mv_cesa_adjust_op(engine, &creq->op_tmpl); + memcpy_toio(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl)); + + digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(req)); + for (i = 0; i < digsize / 4; i++) + writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i)); if (creq->cache_ptr) memcpy_toio(engine->sram + CESA_SA_DATA_SRAM_OFFSET, @@ -265,11 +274,8 @@ static void mv_cesa_ahash_std_prepare(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_ahash_std_req *sreq = &creq->req.std; - struct mv_cesa_engine *engine = creq->base.engine; sreq->offset = 0; - mv_cesa_adjust_op(engine, &creq->op_tmpl); - memcpy_toio(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl)); } static void mv_cesa_ahash_step(struct crypto_async_request *req) @@ -336,8 +342,6 @@ static void mv_cesa_ahash_prepare(struct crypto_async_request *req, { struct ahash_request *ahashreq = ahash_request_cast(req); struct mv_cesa_ahash_req *creq = ahash_request_ctx(ahashreq); - unsigned int digsize; - int i; creq->base.engine = engine; @@ -345,10 +349,6 @@ static void mv_cesa_ahash_prepare(struct crypto_async_request *req, mv_cesa_ahash_dma_prepare(ahashreq); else mv_cesa_ahash_std_prepare(ahashreq); - - digsize = crypto_ahash_digestsize(crypto_ahash_reqtfm(ahashreq)); - for (i = 0; i < digsize / 4; i++) - writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i)); } static void mv_cesa_ahash_req_cleanup(struct crypto_async_request *req) -- cgit v1.2.3 From bf8f91e711926c1fb57629338e30164ecfba9700 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:38 +0200 Subject: crypto: marvell - Add load balancing between engines This commits adds support for fine grained load balancing on multi-engine IPs. The engine is pre-selected based on its current load and on the weight of the crypto request that is about to be processed. The global crypto queue is also moved to each engine. These changes are required to allow chaining crypto requests at the DMA level. By using a crypto queue per engine, we make sure that we keep the state of the tdma chain synchronized with the crypto queue. We also reduce contention on 'cesa_dev->lock' and improve parallelism. Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa.c | 34 ++++++++++++------------ drivers/crypto/marvell/cesa.h | 29 +++++++++++++++++---- drivers/crypto/marvell/cipher.c | 57 ++++++++++++++++++----------------------- drivers/crypto/marvell/hash.c | 50 ++++++++++++++---------------------- 4 files changed, 84 insertions(+), 86 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index 40d4add19947..26624f176c39 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -40,16 +40,14 @@ MODULE_PARM_DESC(allhwsupport, "Enable support for all hardware (even it if over struct mv_cesa_dev *cesa_dev; -static void mv_cesa_dequeue_req_unlocked(struct mv_cesa_engine *engine) +static void mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine) { struct crypto_async_request *req, *backlog; struct mv_cesa_ctx *ctx; - spin_lock_bh(&cesa_dev->lock); - backlog = crypto_get_backlog(&cesa_dev->queue); - req = crypto_dequeue_request(&cesa_dev->queue); + backlog = crypto_get_backlog(&engine->queue); + req = crypto_dequeue_request(&engine->queue); engine->req = req; - spin_unlock_bh(&cesa_dev->lock); if (!req) return; @@ -58,7 +56,6 @@ static void mv_cesa_dequeue_req_unlocked(struct mv_cesa_engine *engine) backlog->complete(backlog, -EINPROGRESS); ctx = crypto_tfm_ctx(req->tfm); - ctx->ops->prepare(req, engine); ctx->ops->step(req); } @@ -96,7 +93,7 @@ static irqreturn_t mv_cesa_int(int irq, void *priv) if (res != -EINPROGRESS) { spin_lock_bh(&engine->lock); engine->req = NULL; - mv_cesa_dequeue_req_unlocked(engine); + mv_cesa_dequeue_req_locked(engine); spin_unlock_bh(&engine->lock); ctx->ops->complete(req); ctx->ops->cleanup(req); @@ -116,21 +113,19 @@ int mv_cesa_queue_req(struct crypto_async_request *req, struct mv_cesa_req *creq) { int ret; - int i; + struct mv_cesa_engine *engine = creq->engine; - spin_lock_bh(&cesa_dev->lock); - ret = crypto_enqueue_request(&cesa_dev->queue, req); - spin_unlock_bh(&cesa_dev->lock); + spin_lock_bh(&engine->lock); + ret = crypto_enqueue_request(&engine->queue, req); + spin_unlock_bh(&engine->lock); if (ret != -EINPROGRESS) return ret; - for (i = 0; i < cesa_dev->caps->nengines; i++) { - spin_lock_bh(&cesa_dev->engines[i].lock); - if (!cesa_dev->engines[i].req) - mv_cesa_dequeue_req_unlocked(&cesa_dev->engines[i]); - spin_unlock_bh(&cesa_dev->engines[i].lock); - } + spin_lock_bh(&engine->lock); + if (!engine->req) + mv_cesa_dequeue_req_locked(engine); + spin_unlock_bh(&engine->lock); return -EINPROGRESS; } @@ -425,7 +420,7 @@ static int mv_cesa_probe(struct platform_device *pdev) return -ENOMEM; spin_lock_init(&cesa->lock); - crypto_init_queue(&cesa->queue, CESA_CRYPTO_DEFAULT_MAX_QLEN); + res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "regs"); cesa->regs = devm_ioremap_resource(dev, res); if (IS_ERR(cesa->regs)) @@ -498,6 +493,9 @@ static int mv_cesa_probe(struct platform_device *pdev) engine); if (ret) goto err_cleanup; + + crypto_init_queue(&engine->queue, CESA_CRYPTO_DEFAULT_MAX_QLEN); + atomic_set(&engine->load, 0); } cesa_dev = cesa; diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h index d7493351f14b..8d600682ca47 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa.h @@ -400,7 +400,6 @@ struct mv_cesa_dev_dma { * @regs: device registers * @sram_size: usable SRAM size * @lock: device lock - * @queue: crypto request queue * @engines: array of engines * @dma: dma pools * @@ -412,7 +411,6 @@ struct mv_cesa_dev { struct device *dev; unsigned int sram_size; spinlock_t lock; - struct crypto_queue queue; struct mv_cesa_engine *engines; struct mv_cesa_dev_dma *dma; }; @@ -431,6 +429,8 @@ struct mv_cesa_dev { * @int_mask: interrupt mask cache * @pool: memory pool pointing to the memory region reserved in * SRAM + * @queue: fifo of the pending crypto requests + * @load: engine load counter, useful for load balancing * * Structure storing CESA engine information. */ @@ -446,11 +446,12 @@ struct mv_cesa_engine { size_t max_req_len; u32 int_mask; struct gen_pool *pool; + struct crypto_queue queue; + atomic_t load; }; /** * struct mv_cesa_req_ops - CESA request operations - * @prepare: prepare a request to be executed on the specified engine * @process: process a request chunk result (should return 0 if the * operation, -EINPROGRESS if it needs more steps or an error * code) @@ -460,8 +461,6 @@ struct mv_cesa_engine { * needed. */ struct mv_cesa_req_ops { - void (*prepare)(struct crypto_async_request *req, - struct mv_cesa_engine *engine); int (*process)(struct crypto_async_request *req, u32 status); void (*step)(struct crypto_async_request *req); void (*cleanup)(struct crypto_async_request *req); @@ -690,6 +689,26 @@ static inline bool mv_cesa_mac_op_is_first_frag(const struct mv_cesa_op_ctx *op) int mv_cesa_queue_req(struct crypto_async_request *req, struct mv_cesa_req *creq); +static inline struct mv_cesa_engine *mv_cesa_select_engine(int weight) +{ + int i; + u32 min_load = U32_MAX; + struct mv_cesa_engine *selected = NULL; + + for (i = 0; i < cesa_dev->caps->nengines; i++) { + struct mv_cesa_engine *engine = cesa_dev->engines + i; + u32 load = atomic_read(&engine->load); + if (load < min_load) { + min_load = load; + selected = engine; + } + } + + atomic_add(weight, &selected->load); + + return selected; +} + /* * Helper function that indicates whether a crypto request needs to be * cleaned up or not after being enqueued using mv_cesa_queue_req(). diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index 1e9c722b1bb0..3eb607cda269 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -214,6 +214,7 @@ mv_cesa_ablkcipher_complete(struct crypto_async_request *req) struct mv_cesa_engine *engine = creq->base.engine; unsigned int ivsize; + atomic_sub(ablkreq->nbytes, &engine->load); ivsize = crypto_ablkcipher_ivsize(crypto_ablkcipher_reqtfm(ablkreq)); if (mv_cesa_req_get_type(&creq->base) == CESA_DMA_REQ) { @@ -231,7 +232,6 @@ mv_cesa_ablkcipher_complete(struct crypto_async_request *req) static const struct mv_cesa_req_ops mv_cesa_ablkcipher_req_ops = { .step = mv_cesa_ablkcipher_step, .process = mv_cesa_ablkcipher_process, - .prepare = mv_cesa_ablkcipher_prepare, .cleanup = mv_cesa_ablkcipher_req_cleanup, .complete = mv_cesa_ablkcipher_complete, }; @@ -456,29 +456,41 @@ static int mv_cesa_ablkcipher_req_init(struct ablkcipher_request *req, return ret; } -static int mv_cesa_des_op(struct ablkcipher_request *req, - struct mv_cesa_op_ctx *tmpl) +static int mv_cesa_ablkcipher_queue_req(struct ablkcipher_request *req, + struct mv_cesa_op_ctx *tmpl) { - struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); - struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(req->base.tfm); int ret; - - mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_DES, - CESA_SA_DESC_CFG_CRYPTM_MSK); - - memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES_KEY_SIZE); + struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); + struct mv_cesa_engine *engine; ret = mv_cesa_ablkcipher_req_init(req, tmpl); if (ret) return ret; + engine = mv_cesa_select_engine(req->nbytes); + mv_cesa_ablkcipher_prepare(&req->base, engine); + ret = mv_cesa_queue_req(&req->base, &creq->base); + if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ablkcipher_cleanup(req); return ret; } +static int mv_cesa_des_op(struct ablkcipher_request *req, + struct mv_cesa_op_ctx *tmpl) +{ + struct mv_cesa_des_ctx *ctx = crypto_tfm_ctx(req->base.tfm); + + mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_DES, + CESA_SA_DESC_CFG_CRYPTM_MSK); + + memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES_KEY_SIZE); + + return mv_cesa_ablkcipher_queue_req(req, tmpl); +} + static int mv_cesa_ecb_des_encrypt(struct ablkcipher_request *req) { struct mv_cesa_op_ctx tmpl; @@ -580,24 +592,14 @@ struct crypto_alg mv_cesa_cbc_des_alg = { static int mv_cesa_des3_op(struct ablkcipher_request *req, struct mv_cesa_op_ctx *tmpl) { - struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); struct mv_cesa_des3_ctx *ctx = crypto_tfm_ctx(req->base.tfm); - int ret; mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_CRYPTM_3DES, CESA_SA_DESC_CFG_CRYPTM_MSK); memcpy(tmpl->ctx.blkcipher.key, ctx->key, DES3_EDE_KEY_SIZE); - ret = mv_cesa_ablkcipher_req_init(req, tmpl); - if (ret) - return ret; - - ret = mv_cesa_queue_req(&req->base, &creq->base); - if (mv_cesa_req_needs_cleanup(&req->base, ret)) - mv_cesa_ablkcipher_cleanup(req); - - return ret; + return mv_cesa_ablkcipher_queue_req(req, tmpl); } static int mv_cesa_ecb_des3_ede_encrypt(struct ablkcipher_request *req) @@ -707,9 +709,8 @@ struct crypto_alg mv_cesa_cbc_des3_ede_alg = { static int mv_cesa_aes_op(struct ablkcipher_request *req, struct mv_cesa_op_ctx *tmpl) { - struct mv_cesa_ablkcipher_req *creq = ablkcipher_request_ctx(req); struct mv_cesa_aes_ctx *ctx = crypto_tfm_ctx(req->base.tfm); - int ret, i; + int i; u32 *key; u32 cfg; @@ -732,15 +733,7 @@ static int mv_cesa_aes_op(struct ablkcipher_request *req, CESA_SA_DESC_CFG_CRYPTM_MSK | CESA_SA_DESC_CFG_AES_LEN_MSK); - ret = mv_cesa_ablkcipher_req_init(req, tmpl); - if (ret) - return ret; - - ret = mv_cesa_queue_req(&req->base, &creq->base); - if (mv_cesa_req_needs_cleanup(&req->base, ret)) - mv_cesa_ablkcipher_cleanup(req); - - return ret; + return mv_cesa_ablkcipher_queue_req(req, tmpl); } static int mv_cesa_ecb_aes_encrypt(struct ablkcipher_request *req) diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 4146757d34eb..638b2b7eae99 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -335,6 +335,8 @@ static void mv_cesa_ahash_complete(struct crypto_async_request *req) result[i] = cpu_to_be32(creq->state[i]); } } + + atomic_sub(ahashreq->nbytes, &engine->load); } static void mv_cesa_ahash_prepare(struct crypto_async_request *req, @@ -365,7 +367,6 @@ static void mv_cesa_ahash_req_cleanup(struct crypto_async_request *req) static const struct mv_cesa_req_ops mv_cesa_ahash_req_ops = { .step = mv_cesa_ahash_step, .process = mv_cesa_ahash_process, - .prepare = mv_cesa_ahash_prepare, .cleanup = mv_cesa_ahash_req_cleanup, .complete = mv_cesa_ahash_complete, }; @@ -682,13 +683,13 @@ static int mv_cesa_ahash_req_init(struct ahash_request *req, bool *cached) return ret; } -static int mv_cesa_ahash_update(struct ahash_request *req) +static int mv_cesa_ahash_queue_req(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + struct mv_cesa_engine *engine; bool cached = false; int ret; - creq->len += req->nbytes; ret = mv_cesa_ahash_req_init(req, &cached); if (ret) return ret; @@ -696,61 +697,48 @@ static int mv_cesa_ahash_update(struct ahash_request *req) if (cached) return 0; + engine = mv_cesa_select_engine(req->nbytes); + mv_cesa_ahash_prepare(&req->base, engine); + ret = mv_cesa_queue_req(&req->base, &creq->base); + if (mv_cesa_req_needs_cleanup(&req->base, ret)) mv_cesa_ahash_cleanup(req); return ret; } +static int mv_cesa_ahash_update(struct ahash_request *req) +{ + struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); + + creq->len += req->nbytes; + + return mv_cesa_ahash_queue_req(req); +} + static int mv_cesa_ahash_final(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl; - bool cached = false; - int ret; mv_cesa_set_mac_op_total_len(tmpl, creq->len); creq->last_req = true; req->nbytes = 0; - ret = mv_cesa_ahash_req_init(req, &cached); - if (ret) - return ret; - - if (cached) - return 0; - - ret = mv_cesa_queue_req(&req->base, &creq->base); - if (mv_cesa_req_needs_cleanup(&req->base, ret)) - mv_cesa_ahash_cleanup(req); - - return ret; + return mv_cesa_ahash_queue_req(req); } static int mv_cesa_ahash_finup(struct ahash_request *req) { struct mv_cesa_ahash_req *creq = ahash_request_ctx(req); struct mv_cesa_op_ctx *tmpl = &creq->op_tmpl; - bool cached = false; - int ret; creq->len += req->nbytes; mv_cesa_set_mac_op_total_len(tmpl, creq->len); creq->last_req = true; - ret = mv_cesa_ahash_req_init(req, &cached); - if (ret) - return ret; - - if (cached) - return 0; - - ret = mv_cesa_queue_req(&req->base, &creq->base); - if (mv_cesa_req_needs_cleanup(&req->base, ret)) - mv_cesa_ahash_cleanup(req); - - return ret; + return mv_cesa_ahash_queue_req(req); } static int mv_cesa_ahash_export(struct ahash_request *req, void *hash, -- cgit v1.2.3 From 85030c5168f1df03a164d47254cc785331f1dfe2 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:39 +0200 Subject: crypto: marvell - Add support for chaining crypto requests in TDMA mode The Cryptographic Engines and Security Accelerators (CESA) supports the Multi-Packet Chain Mode. With this mode enabled, multiple tdma requests can be chained and processed by the hardware without software intervention. This mode was already activated, however the crypto requests were not chained together. By doing so, we reduce significantly the number of IRQs. Instead of being interrupted at the end of each crypto request, we are interrupted at the end of the last cryptographic request processed by the engine. This commits re-factorizes the code, changes the code architecture and adds the required data structures to chain cryptographic requests together before sending them to an engine (stopped or possibly already running). Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa.c | 115 +++++++++++++++++++++++++++++++--------- drivers/crypto/marvell/cesa.h | 39 +++++++++++++- drivers/crypto/marvell/cipher.c | 2 +- drivers/crypto/marvell/hash.c | 6 +++ drivers/crypto/marvell/tdma.c | 86 ++++++++++++++++++++++++++++++ 5 files changed, 221 insertions(+), 27 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index 26624f176c39..218c497f2ddb 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -40,14 +40,33 @@ MODULE_PARM_DESC(allhwsupport, "Enable support for all hardware (even it if over struct mv_cesa_dev *cesa_dev; -static void mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine) +struct crypto_async_request * +mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine, + struct crypto_async_request **backlog) { - struct crypto_async_request *req, *backlog; - struct mv_cesa_ctx *ctx; + struct crypto_async_request *req; - backlog = crypto_get_backlog(&engine->queue); + *backlog = crypto_get_backlog(&engine->queue); req = crypto_dequeue_request(&engine->queue); - engine->req = req; + + if (!req) + return NULL; + + return req; +} + +static void mv_cesa_rearm_engine(struct mv_cesa_engine *engine) +{ + struct crypto_async_request *req = NULL, *backlog = NULL; + struct mv_cesa_ctx *ctx; + + + spin_lock_bh(&engine->lock); + if (!engine->req) { + req = mv_cesa_dequeue_req_locked(engine, &backlog); + engine->req = req; + } + spin_unlock_bh(&engine->lock); if (!req) return; @@ -57,6 +76,46 @@ static void mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine) ctx = crypto_tfm_ctx(req->tfm); ctx->ops->step(req); + + return; +} + +static int mv_cesa_std_process(struct mv_cesa_engine *engine, u32 status) +{ + struct crypto_async_request *req; + struct mv_cesa_ctx *ctx; + int res; + + req = engine->req; + ctx = crypto_tfm_ctx(req->tfm); + res = ctx->ops->process(req, status); + + if (res == 0) { + ctx->ops->complete(req); + mv_cesa_engine_enqueue_complete_request(engine, req); + } else if (res == -EINPROGRESS) { + ctx->ops->step(req); + } + + return res; +} + +static int mv_cesa_int_process(struct mv_cesa_engine *engine, u32 status) +{ + if (engine->chain.first && engine->chain.last) + return mv_cesa_tdma_process(engine, status); + + return mv_cesa_std_process(engine, status); +} + +static inline void +mv_cesa_complete_req(struct mv_cesa_ctx *ctx, struct crypto_async_request *req, + int res) +{ + ctx->ops->cleanup(req); + local_bh_disable(); + req->complete(req, res); + local_bh_enable(); } static irqreturn_t mv_cesa_int(int irq, void *priv) @@ -83,26 +142,31 @@ static irqreturn_t mv_cesa_int(int irq, void *priv) writel(~status, engine->regs + CESA_SA_FPGA_INT_STATUS); writel(~status, engine->regs + CESA_SA_INT_STATUS); + /* Process fetched requests */ + res = mv_cesa_int_process(engine, status & mask); ret = IRQ_HANDLED; + spin_lock_bh(&engine->lock); req = engine->req; + if (res != -EINPROGRESS) + engine->req = NULL; spin_unlock_bh(&engine->lock); - if (req) { - ctx = crypto_tfm_ctx(req->tfm); - res = ctx->ops->process(req, status & mask); - if (res != -EINPROGRESS) { - spin_lock_bh(&engine->lock); - engine->req = NULL; - mv_cesa_dequeue_req_locked(engine); - spin_unlock_bh(&engine->lock); - ctx->ops->complete(req); - ctx->ops->cleanup(req); - local_bh_disable(); - req->complete(req, res); - local_bh_enable(); - } else { - ctx->ops->step(req); - } + + ctx = crypto_tfm_ctx(req->tfm); + + if (res && res != -EINPROGRESS) + mv_cesa_complete_req(ctx, req, res); + + /* Launch the next pending request */ + mv_cesa_rearm_engine(engine); + + /* Iterate over the complete queue */ + while (true) { + req = mv_cesa_engine_dequeue_complete_request(engine); + if (!req) + break; + + mv_cesa_complete_req(ctx, req, 0); } } @@ -116,16 +180,16 @@ int mv_cesa_queue_req(struct crypto_async_request *req, struct mv_cesa_engine *engine = creq->engine; spin_lock_bh(&engine->lock); + if (mv_cesa_req_get_type(creq) == CESA_DMA_REQ) + mv_cesa_tdma_chain(engine, creq); + ret = crypto_enqueue_request(&engine->queue, req); spin_unlock_bh(&engine->lock); if (ret != -EINPROGRESS) return ret; - spin_lock_bh(&engine->lock); - if (!engine->req) - mv_cesa_dequeue_req_locked(engine); - spin_unlock_bh(&engine->lock); + mv_cesa_rearm_engine(engine); return -EINPROGRESS; } @@ -496,6 +560,7 @@ static int mv_cesa_probe(struct platform_device *pdev) crypto_init_queue(&engine->queue, CESA_CRYPTO_DEFAULT_MAX_QLEN); atomic_set(&engine->load, 0); + INIT_LIST_HEAD(&engine->complete_queue); } cesa_dev = cesa; diff --git a/drivers/crypto/marvell/cesa.h b/drivers/crypto/marvell/cesa.h index 8d600682ca47..e423d33decd4 100644 --- a/drivers/crypto/marvell/cesa.h +++ b/drivers/crypto/marvell/cesa.h @@ -271,7 +271,9 @@ struct mv_cesa_op_ctx { /* TDMA descriptor flags */ #define CESA_TDMA_DST_IN_SRAM BIT(31) #define CESA_TDMA_SRC_IN_SRAM BIT(30) -#define CESA_TDMA_TYPE_MSK GENMASK(29, 0) +#define CESA_TDMA_END_OF_REQ BIT(29) +#define CESA_TDMA_BREAK_CHAIN BIT(28) +#define CESA_TDMA_TYPE_MSK GENMASK(27, 0) #define CESA_TDMA_DUMMY 0 #define CESA_TDMA_DATA 1 #define CESA_TDMA_OP 2 @@ -431,6 +433,9 @@ struct mv_cesa_dev { * SRAM * @queue: fifo of the pending crypto requests * @load: engine load counter, useful for load balancing + * @chain: list of the current tdma descriptors being processed + * by this engine. + * @complete_queue: fifo of the processed requests by the engine * * Structure storing CESA engine information. */ @@ -448,6 +453,8 @@ struct mv_cesa_engine { struct gen_pool *pool; struct crypto_queue queue; atomic_t load; + struct mv_cesa_tdma_chain chain; + struct list_head complete_queue; }; /** @@ -608,6 +615,29 @@ struct mv_cesa_ahash_req { extern struct mv_cesa_dev *cesa_dev; + +static inline void +mv_cesa_engine_enqueue_complete_request(struct mv_cesa_engine *engine, + struct crypto_async_request *req) +{ + list_add_tail(&req->list, &engine->complete_queue); +} + +static inline struct crypto_async_request * +mv_cesa_engine_dequeue_complete_request(struct mv_cesa_engine *engine) +{ + struct crypto_async_request *req; + + req = list_first_entry_or_null(&engine->complete_queue, + struct crypto_async_request, + list); + if (req) + list_del(&req->list); + + return req; +} + + static inline enum mv_cesa_req_type mv_cesa_req_get_type(struct mv_cesa_req *req) { @@ -689,6 +719,10 @@ static inline bool mv_cesa_mac_op_is_first_frag(const struct mv_cesa_op_ctx *op) int mv_cesa_queue_req(struct crypto_async_request *req, struct mv_cesa_req *creq); +struct crypto_async_request * +mv_cesa_dequeue_req_locked(struct mv_cesa_engine *engine, + struct crypto_async_request **backlog); + static inline struct mv_cesa_engine *mv_cesa_select_engine(int weight) { int i; @@ -794,6 +828,9 @@ static inline int mv_cesa_dma_process(struct mv_cesa_req *dreq, void mv_cesa_dma_prepare(struct mv_cesa_req *dreq, struct mv_cesa_engine *engine); void mv_cesa_dma_cleanup(struct mv_cesa_req *dreq); +void mv_cesa_tdma_chain(struct mv_cesa_engine *engine, + struct mv_cesa_req *dreq); +int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status); static inline void diff --git a/drivers/crypto/marvell/cipher.c b/drivers/crypto/marvell/cipher.c index 3eb607cda269..48df03a06066 100644 --- a/drivers/crypto/marvell/cipher.c +++ b/drivers/crypto/marvell/cipher.c @@ -390,6 +390,7 @@ static int mv_cesa_ablkcipher_dma_req_init(struct ablkcipher_request *req, goto err_free_tdma; basereq->chain = chain; + basereq->chain.last->flags |= CESA_TDMA_END_OF_REQ; return 0; @@ -447,7 +448,6 @@ static int mv_cesa_ablkcipher_req_init(struct ablkcipher_request *req, mv_cesa_update_op_cfg(tmpl, CESA_SA_DESC_CFG_OP_CRYPT_ONLY, CESA_SA_DESC_CFG_OP_MSK); - /* TODO: add a threshold for DMA usage */ if (cesa_dev->caps->has_tdma) ret = mv_cesa_ablkcipher_dma_req_init(req, tmpl); else diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 638b2b7eae99..c35912b4fffb 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -172,6 +172,9 @@ static void mv_cesa_ahash_std_step(struct ahash_request *req) for (i = 0; i < digsize / 4; i++) writel_relaxed(creq->state[i], engine->regs + CESA_IVDIG(i)); + mv_cesa_adjust_op(engine, &creq->op_tmpl); + memcpy_toio(engine->sram, &creq->op_tmpl, sizeof(creq->op_tmpl)); + if (creq->cache_ptr) memcpy_toio(engine->sram + CESA_SA_DATA_SRAM_OFFSET, creq->cache, creq->cache_ptr); @@ -647,6 +650,9 @@ static int mv_cesa_ahash_dma_req_init(struct ahash_request *req) else creq->cache_ptr = 0; + basereq->chain.last->flags |= (CESA_TDMA_END_OF_REQ | + CESA_TDMA_BREAK_CHAIN); + return 0; err_free_tdma: diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index f9b8d2cfa1bc..b0f9f5050dc8 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -99,6 +99,92 @@ void mv_cesa_dma_prepare(struct mv_cesa_req *dreq, } } +void mv_cesa_tdma_chain(struct mv_cesa_engine *engine, + struct mv_cesa_req *dreq) +{ + if (engine->chain.first == NULL && engine->chain.last == NULL) { + engine->chain.first = dreq->chain.first; + engine->chain.last = dreq->chain.last; + } else { + struct mv_cesa_tdma_desc *last; + + last = engine->chain.last; + last->next = dreq->chain.first; + engine->chain.last = dreq->chain.last; + + if (!(last->flags & CESA_TDMA_BREAK_CHAIN)) + last->next_dma = dreq->chain.first->cur_dma; + } +} + +int mv_cesa_tdma_process(struct mv_cesa_engine *engine, u32 status) +{ + struct crypto_async_request *req = NULL; + struct mv_cesa_tdma_desc *tdma = NULL, *next = NULL; + dma_addr_t tdma_cur; + int res = 0; + + tdma_cur = readl(engine->regs + CESA_TDMA_CUR); + + for (tdma = engine->chain.first; tdma; tdma = next) { + spin_lock_bh(&engine->lock); + next = tdma->next; + spin_unlock_bh(&engine->lock); + + if (tdma->flags & CESA_TDMA_END_OF_REQ) { + struct crypto_async_request *backlog = NULL; + struct mv_cesa_ctx *ctx; + u32 current_status; + + spin_lock_bh(&engine->lock); + /* + * if req is NULL, this means we're processing the + * request in engine->req. + */ + if (!req) + req = engine->req; + else + req = mv_cesa_dequeue_req_locked(engine, + &backlog); + + /* Re-chaining to the next request */ + engine->chain.first = tdma->next; + tdma->next = NULL; + + /* If this is the last request, clear the chain */ + if (engine->chain.first == NULL) + engine->chain.last = NULL; + spin_unlock_bh(&engine->lock); + + ctx = crypto_tfm_ctx(req->tfm); + current_status = (tdma->cur_dma == tdma_cur) ? + status : CESA_SA_INT_ACC0_IDMA_DONE; + res = ctx->ops->process(req, current_status); + ctx->ops->complete(req); + + if (res == 0) + mv_cesa_engine_enqueue_complete_request(engine, + req); + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + } + + if (res || tdma->cur_dma == tdma_cur) + break; + } + + /* Save the last request in error to engine->req, so that the core + * knows which request was fautly */ + if (res) { + spin_lock_bh(&engine->lock); + engine->req = req; + spin_unlock_bh(&engine->lock); + } + + return res; +} + static struct mv_cesa_tdma_desc * mv_cesa_dma_add_desc(struct mv_cesa_tdma_chain *chain, gfp_t flags) { -- cgit v1.2.3 From 47a1f0b2d1d11350ffecbfebc4105116bf6c133e Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Tue, 21 Jun 2016 10:08:40 +0200 Subject: crypto: marvell - Increase the size of the crypto queue Now that crypto requests are chained together at the DMA level, we increase the size of the crypto queue for each engine. The result is that as the backlog list is reached later, it does not stop the crypto stack from sending asychronous requests, so more cryptographic tasks are processed by the engines. Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/cesa.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/marvell/cesa.c b/drivers/crypto/marvell/cesa.c index 218c497f2ddb..e373cc6557c6 100644 --- a/drivers/crypto/marvell/cesa.c +++ b/drivers/crypto/marvell/cesa.c @@ -32,7 +32,7 @@ #include "cesa.h" /* Limit of the crypto queue before reaching the backlog */ -#define CESA_CRYPTO_DEFAULT_MAX_QLEN 50 +#define CESA_CRYPTO_DEFAULT_MAX_QLEN 128 static int allhwsupport = !IS_ENABLED(CONFIG_CRYPTO_DEV_MV_CESA); module_param_named(allhwsupport, allhwsupport, int, 0444); -- cgit v1.2.3 From e93f767bec1c4b58ac24bd59143c0030b9555426 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Wed, 22 Jun 2016 16:23:34 +0300 Subject: crypto: omap-sham - use runtime_pm autosuspend for clock handling Calling runtime PM API for every block causes serious performance hit to crypto operations that are done on a long buffer. As crypto is performed on a page boundary, encrypting large buffers can cause a series of crypto operations divided by page. The runtime PM API is also called those many times. Convert the driver to use runtime_pm autosuspend instead, with a default timeout value of 1 second. This results in upto ~50% speedup. Signed-off-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 63464e86f2b1..887bc32b7993 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -100,6 +100,8 @@ #define DEFAULT_TIMEOUT_INTERVAL HZ +#define DEFAULT_AUTOSUSPEND_DELAY 1000 + /* mostly device flags */ #define FLAGS_BUSY 0 #define FLAGS_FINAL 1 @@ -999,7 +1001,8 @@ static void omap_sham_finish_req(struct ahash_request *req, int err) dd->flags &= ~(BIT(FLAGS_BUSY) | BIT(FLAGS_FINAL) | BIT(FLAGS_CPU) | BIT(FLAGS_DMA_READY) | BIT(FLAGS_OUTPUT_READY)); - pm_runtime_put(dd->dev); + pm_runtime_mark_last_busy(dd->dev); + pm_runtime_put_autosuspend(dd->dev); if (req->base.complete) req->base.complete(&req->base, err); @@ -1946,6 +1949,9 @@ static int omap_sham_probe(struct platform_device *pdev) dd->flags |= dd->pdata->flags; + pm_runtime_use_autosuspend(dev); + pm_runtime_set_autosuspend_delay(dev, DEFAULT_AUTOSUSPEND_DELAY); + pm_runtime_enable(dev); pm_runtime_irq_safe(dev); -- cgit v1.2.3 From 65e7a549af295cb2034f17e99211b97e9d02cbee Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Wed, 22 Jun 2016 16:23:35 +0300 Subject: crypto: omap-sham - change queue size from 1 to 10 Change crypto queue size from 1 to 10 for omap SHA driver. This should allow clients to enqueue requests more effectively to avoid serializing whole crypto sequences, giving extra performance. Signed-off-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 887bc32b7993..8090db0de466 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -175,7 +175,7 @@ struct omap_sham_ctx { struct omap_sham_hmac_ctx base[0]; }; -#define OMAP_SHAM_QUEUE_LENGTH 1 +#define OMAP_SHAM_QUEUE_LENGTH 10 struct omap_sham_algs_info { struct ahash_alg *algs_list; -- cgit v1.2.3 From b973eaab68db858cb42f5283b1b0ed6773d8fdd9 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Wed, 22 Jun 2016 16:23:36 +0300 Subject: crypto: omap - do not call dmaengine_terminate_all The extra call to dmaengine_terminate_all is not needed, as the DMA is not running at this point. This improves performance slightly. Signed-off-by: Lokesh Vutla Signed-off-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/omap-aes.c | 2 -- drivers/crypto/omap-sham.c | 1 - 2 files changed, 3 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 4a0e6a545ba2..8178632de788 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -528,8 +528,6 @@ static int omap_aes_crypt_dma_stop(struct omap_aes_dev *dd) omap_aes_dma_stop(dd); - dmaengine_terminate_all(dd->dma_lch_in); - dmaengine_terminate_all(dd->dma_lch_out); return 0; } diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 8090db0de466..3321f003f465 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -815,7 +815,6 @@ static int omap_sham_update_dma_stop(struct omap_sham_dev *dd) { struct omap_sham_reqctx *ctx = ahash_request_ctx(dd->req); - dmaengine_terminate_all(dd->dma_lch); if (ctx->flags & BIT(FLAGS_SG)) { dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE); -- cgit v1.2.3 From 85e0687f8fac9032681b163a17f806b52205922e Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Wed, 22 Jun 2016 16:23:37 +0300 Subject: crypto: omap-sham - set sw fallback to 240 bytes Adds software fallback support for small crypto requests. In these cases, it is undesirable to use DMA, as setting it up itself is rather heavy operation. Gives about 40% extra performance in ipsec usecase. Signed-off-by: Bin Liu [t-kristo@ti.com: dropped the extra traces, updated some comments on the code] Signed-off-by: Tero Kristo Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 3321f003f465..ae6f841f81ff 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -1095,7 +1095,7 @@ static int omap_sham_update(struct ahash_request *req) ctx->offset = 0; if (ctx->flags & BIT(FLAGS_FINUP)) { - if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 9) { + if ((ctx->digcnt + ctx->bufcnt + ctx->total) < 240) { /* * OMAP HW accel works only with buffers >= 9 * will switch to bypass in final() @@ -1151,9 +1151,13 @@ static int omap_sham_final(struct ahash_request *req) if (ctx->flags & BIT(FLAGS_ERROR)) return 0; /* uncompleted hash is not needed */ - /* OMAP HW accel works only with buffers >= 9 */ - /* HMAC is always >= 9 because ipad == block size */ - if ((ctx->digcnt + ctx->bufcnt) < 9) + /* + * OMAP HW accel works only with buffers >= 9. + * HMAC is always >= 9 because ipad == block size. + * If buffersize is less than 240, we use fallback SW encoding, + * as using DMA + HW in this case doesn't provide any benefit. + */ + if ((ctx->digcnt + ctx->bufcnt) < 240) return omap_sham_final_shash(req); else if (ctx->bufcnt) return omap_sham_enqueue(req, OP_FINAL); -- cgit v1.2.3 From 9ac1c3200c79e7b83b881232f40bc9e34b040516 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 27 Jun 2016 11:17:40 +0200 Subject: crypto: ux500 - do not build with -O0 The ARM allmodconfig build currently warngs because of the ux500 crypto driver not working well with the jump label implementation that we started using for dynamic debug, which breaks building with 'gcc -O0': In file included from /git/arm-soc/include/linux/jump_label.h:105:0, from /git/arm-soc/include/linux/dynamic_debug.h:5, from /git/arm-soc/include/linux/printk.h:289, from /git/arm-soc/include/linux/kernel.h:13, from /git/arm-soc/include/linux/clk.h:16, from /git/arm-soc/drivers/crypto/ux500/hash/hash_core.c:16: /git/arm-soc/arch/arm/include/asm/jump_label.h: In function 'hash_set_dma_transfer': /git/arm-soc/arch/arm/include/asm/jump_label.h:13:7: error: asm operand 0 probably doesn't match constraints [-Werror] asm_volatile_goto("1:\n\t" Turning off compiler optimizations has never really been supported here, and it's only used when debugging the driver. I have not found a good reason for doing this here, other than a misguided attempt to produce more readable assembly output. Also, the driver is only used in obsolete hardware that almost certainly nobody will spend time debugging any more. This just removes the -O0 flag from the compiler options. Signed-off-by: Arnd Bergmann Signed-off-by: Herbert Xu --- drivers/crypto/ux500/cryp/Makefile | 6 +++--- drivers/crypto/ux500/hash/Makefile | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/ux500/cryp/Makefile b/drivers/crypto/ux500/cryp/Makefile index e5d362a6f680..b497ae3dde07 100644 --- a/drivers/crypto/ux500/cryp/Makefile +++ b/drivers/crypto/ux500/cryp/Makefile @@ -4,9 +4,9 @@ # * License terms: GNU General Public License (GPL) version 2 */ ifdef CONFIG_CRYPTO_DEV_UX500_DEBUG -CFLAGS_cryp_core.o := -DDEBUG -O0 -CFLAGS_cryp.o := -DDEBUG -O0 -CFLAGS_cryp_irq.o := -DDEBUG -O0 +CFLAGS_cryp_core.o := -DDEBUG +CFLAGS_cryp.o := -DDEBUG +CFLAGS_cryp_irq.o := -DDEBUG endif obj-$(CONFIG_CRYPTO_DEV_UX500_CRYP) += ux500_cryp.o diff --git a/drivers/crypto/ux500/hash/Makefile b/drivers/crypto/ux500/hash/Makefile index b2f90d9bac72..784d9c0a8853 100644 --- a/drivers/crypto/ux500/hash/Makefile +++ b/drivers/crypto/ux500/hash/Makefile @@ -4,7 +4,7 @@ # License terms: GNU General Public License (GPL) version 2 # ifdef CONFIG_CRYPTO_DEV_UX500_DEBUG -CFLAGS_hash_core.o := -DDEBUG -O0 +CFLAGS_hash_core.o := -DDEBUG endif obj-$(CONFIG_CRYPTO_DEV_UX500_HASH) += ux500_hash.o -- cgit v1.2.3 From 241118de58dace0fe24a754b6e2e3cb6f804ad47 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 18:04:01 +0800 Subject: crypto: ccp - Use skcipher for fallback This patch replaces use of the obsolete ablkcipher with skcipher. Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-crypto-aes-xts.c | 43 +++++++++++++++------------------ drivers/crypto/ccp/ccp-crypto.h | 3 +-- 2 files changed, 21 insertions(+), 25 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/ccp/ccp-crypto-aes-xts.c b/drivers/crypto/ccp/ccp-crypto-aes-xts.c index 0d0d4529ee36..58a4244b4752 100644 --- a/drivers/crypto/ccp/ccp-crypto-aes-xts.c +++ b/drivers/crypto/ccp/ccp-crypto-aes-xts.c @@ -14,9 +14,8 @@ #include #include #include -#include -#include #include +#include #include #include "ccp-crypto.h" @@ -110,15 +109,12 @@ static int ccp_aes_xts_setkey(struct crypto_ablkcipher *tfm, const u8 *key, ctx->u.aes.key_len = key_len / 2; sg_init_one(&ctx->u.aes.key_sg, ctx->u.aes.key, key_len); - return crypto_ablkcipher_setkey(ctx->u.aes.tfm_ablkcipher, key, - key_len); + return crypto_skcipher_setkey(ctx->u.aes.tfm_skcipher, key, key_len); } static int ccp_aes_xts_crypt(struct ablkcipher_request *req, unsigned int encrypt) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct ccp_ctx *ctx = crypto_tfm_ctx(req->base.tfm); struct ccp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); unsigned int unit; @@ -146,14 +142,19 @@ static int ccp_aes_xts_crypt(struct ablkcipher_request *req, if ((unit_size == CCP_XTS_AES_UNIT_SIZE__LAST) || (ctx->u.aes.key_len != AES_KEYSIZE_128)) { + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->u.aes.tfm_skcipher); + /* Use the fallback to process the request for any * unsupported unit sizes or key sizes */ - ablkcipher_request_set_tfm(req, ctx->u.aes.tfm_ablkcipher); - ret = (encrypt) ? crypto_ablkcipher_encrypt(req) : - crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); - + skcipher_request_set_tfm(subreq, ctx->u.aes.tfm_skcipher); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + ret = encrypt ? crypto_skcipher_encrypt(subreq) : + crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return ret; } @@ -192,23 +193,21 @@ static int ccp_aes_xts_decrypt(struct ablkcipher_request *req) static int ccp_aes_xts_cra_init(struct crypto_tfm *tfm) { struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); - struct crypto_ablkcipher *fallback_tfm; + struct crypto_skcipher *fallback_tfm; ctx->complete = ccp_aes_xts_complete; ctx->u.aes.key_len = 0; - fallback_tfm = crypto_alloc_ablkcipher(crypto_tfm_alg_name(tfm), 0, - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK); + fallback_tfm = crypto_alloc_skcipher("xts(aes)", 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(fallback_tfm)) { - pr_warn("could not load fallback driver %s\n", - crypto_tfm_alg_name(tfm)); + pr_warn("could not load fallback driver xts(aes)\n"); return PTR_ERR(fallback_tfm); } - ctx->u.aes.tfm_ablkcipher = fallback_tfm; + ctx->u.aes.tfm_skcipher = fallback_tfm; - tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx) + - fallback_tfm->base.crt_ablkcipher.reqsize; + tfm->crt_ablkcipher.reqsize = sizeof(struct ccp_aes_req_ctx); return 0; } @@ -217,9 +216,7 @@ static void ccp_aes_xts_cra_exit(struct crypto_tfm *tfm) { struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); - if (ctx->u.aes.tfm_ablkcipher) - crypto_free_ablkcipher(ctx->u.aes.tfm_ablkcipher); - ctx->u.aes.tfm_ablkcipher = NULL; + crypto_free_skcipher(ctx->u.aes.tfm_skcipher); } static int ccp_register_aes_xts_alg(struct list_head *head, diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h index a326ec20bfa8..8335b32e815e 100644 --- a/drivers/crypto/ccp/ccp-crypto.h +++ b/drivers/crypto/ccp/ccp-crypto.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include @@ -69,7 +68,7 @@ static inline struct ccp_crypto_ahash_alg * /***** AES related defines *****/ struct ccp_aes_ctx { /* Fallback cipher for XTS with unsupported unit sizes */ - struct crypto_ablkcipher *tfm_ablkcipher; + struct crypto_skcipher *tfm_skcipher; /* Cipher used to generate CMAC K1/K2 keys */ struct crypto_cipher *tfm_cipher; -- cgit v1.2.3 From 29406bb9239fcf235357b95ae670841e6589b453 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 18:04:02 +0800 Subject: crypto: mxs-dcp - Use skcipher for fallback This patch replaces use of the obsolete ablkcipher with skcipher. Signed-off-by: Herbert Xu --- drivers/crypto/mxs-dcp.c | 47 +++++++++++++++++++++-------------------------- 1 file changed, 21 insertions(+), 26 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 59ed54e464a9..625ee50fd78b 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -11,7 +11,6 @@ * http://www.gnu.org/copyleft/gpl.html */ -#include #include #include #include @@ -25,6 +24,7 @@ #include #include #include +#include #define DCP_MAX_CHANS 4 #define DCP_BUF_SZ PAGE_SIZE @@ -84,7 +84,7 @@ struct dcp_async_ctx { unsigned int hot:1; /* Crypto-specific context */ - struct crypto_ablkcipher *fallback; + struct crypto_skcipher *fallback; unsigned int key_len; uint8_t key[AES_KEYSIZE_128]; }; @@ -374,20 +374,22 @@ static int dcp_chan_thread_aes(void *data) static int mxs_dcp_block_fallback(struct ablkcipher_request *req, int enc) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); - struct dcp_async_ctx *ctx = crypto_ablkcipher_ctx( - crypto_ablkcipher_reqtfm(req)); + struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req); + struct dcp_async_ctx *ctx = crypto_ablkcipher_ctx(tfm); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); int ret; - ablkcipher_request_set_tfm(req, ctx->fallback); + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); if (enc) - ret = crypto_ablkcipher_encrypt(req); + ret = crypto_skcipher_encrypt(subreq); else - ret = crypto_ablkcipher_decrypt(req); + ret = crypto_skcipher_decrypt(subreq); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + skcipher_request_zero(subreq); return ret; } @@ -453,28 +455,22 @@ static int mxs_dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, return 0; } - /* Check if the key size is supported by kernel at all. */ - if (len != AES_KEYSIZE_192 && len != AES_KEYSIZE_256) { - tfm->base.crt_flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; - return -EINVAL; - } - /* * If the requested AES key size is not supported by the hardware, * but is supported by in-kernel software implementation, we use * software fallback. */ - actx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; - actx->fallback->base.crt_flags |= - tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK; + crypto_skcipher_clear_flags(actx->fallback, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(actx->fallback, + tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); - ret = crypto_ablkcipher_setkey(actx->fallback, key, len); + ret = crypto_skcipher_setkey(actx->fallback, key, len); if (!ret) return 0; tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->base.crt_flags |= - actx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK; + tfm->base.crt_flags |= crypto_skcipher_get_flags(actx->fallback) & + CRYPTO_TFM_RES_MASK; return ret; } @@ -484,9 +480,9 @@ static int mxs_dcp_aes_fallback_init(struct crypto_tfm *tfm) const char *name = crypto_tfm_alg_name(tfm); const uint32_t flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK; struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm); - struct crypto_ablkcipher *blk; + struct crypto_skcipher *blk; - blk = crypto_alloc_ablkcipher(name, 0, flags); + blk = crypto_alloc_skcipher(name, 0, flags); if (IS_ERR(blk)) return PTR_ERR(blk); @@ -499,8 +495,7 @@ static void mxs_dcp_aes_fallback_exit(struct crypto_tfm *tfm) { struct dcp_async_ctx *actx = crypto_tfm_ctx(tfm); - crypto_free_ablkcipher(actx->fallback); - actx->fallback = NULL; + crypto_free_skcipher(actx->fallback); } /* -- cgit v1.2.3 From 1eb60ff82d822299b8b4fd685d1f85de87706c00 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 18:04:03 +0800 Subject: crypto: picoxcell - Use skcipher for fallback This patch replaces use of the obsolete ablkcipher with skcipher. Signed-off-by: Herbert Xu --- drivers/crypto/picoxcell_crypto.c | 60 ++++++++++++++++++++------------------- 1 file changed, 31 insertions(+), 29 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index 3b1c7ecf078f..47576098831f 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -171,7 +171,7 @@ struct spacc_ablk_ctx { * The fallback cipher. If the operation can't be done in hardware, * fallback to a software version. */ - struct crypto_ablkcipher *sw_cipher; + struct crypto_skcipher *sw_cipher; }; /* AEAD cipher context. */ @@ -789,33 +789,35 @@ static int spacc_aes_setkey(struct crypto_ablkcipher *cipher, const u8 *key, * request for any other size (192 bits) then we need to do a software * fallback. */ - if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256 && - ctx->sw_cipher) { + if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256) { + if (!ctx->sw_cipher) + return -EINVAL; + /* * Set the fallback transform to use the same request flags as * the hardware transform. */ - ctx->sw_cipher->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; - ctx->sw_cipher->base.crt_flags |= - cipher->base.crt_flags & CRYPTO_TFM_REQ_MASK; + crypto_skcipher_clear_flags(ctx->sw_cipher, + CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(ctx->sw_cipher, + cipher->base.crt_flags & + CRYPTO_TFM_REQ_MASK); + + err = crypto_skcipher_setkey(ctx->sw_cipher, key, len); + + tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->crt_flags |= + crypto_skcipher_get_flags(ctx->sw_cipher) & + CRYPTO_TFM_RES_MASK; - err = crypto_ablkcipher_setkey(ctx->sw_cipher, key, len); if (err) goto sw_setkey_failed; - } else if (len != AES_KEYSIZE_128 && len != AES_KEYSIZE_256 && - !ctx->sw_cipher) - err = -EINVAL; + } memcpy(ctx->key, key, len); ctx->key_len = len; sw_setkey_failed: - if (err && ctx->sw_cipher) { - tfm->crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm->crt_flags |= - ctx->sw_cipher->base.crt_flags & CRYPTO_TFM_RES_MASK; - } - return err; } @@ -910,20 +912,21 @@ static int spacc_ablk_do_fallback(struct ablkcipher_request *req, struct crypto_tfm *old_tfm = crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(old_tfm); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->sw_cipher); int err; - if (!ctx->sw_cipher) - return -EINVAL; - /* * Change the request to use the software fallback transform, and once * the ciphering has completed, put the old transform back into the * request. */ - ablkcipher_request_set_tfm(req, ctx->sw_cipher); - err = is_encrypt ? crypto_ablkcipher_encrypt(req) : - crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(old_tfm)); + skcipher_request_set_tfm(subreq, ctx->sw_cipher); + skcipher_request_set_callback(subreq, req->base.flags, NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = is_encrypt ? crypto_skcipher_encrypt(subreq) : + crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -1015,12 +1018,13 @@ static int spacc_ablk_cra_init(struct crypto_tfm *tfm) ctx->generic.flags = spacc_alg->type; ctx->generic.engine = engine; if (alg->cra_flags & CRYPTO_ALG_NEED_FALLBACK) { - ctx->sw_cipher = crypto_alloc_ablkcipher(alg->cra_name, 0, - CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + ctx->sw_cipher = crypto_alloc_skcipher( + alg->cra_name, 0, CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->sw_cipher)) { dev_warn(engine->dev, "failed to allocate fallback for %s\n", alg->cra_name); - ctx->sw_cipher = NULL; + return PTR_ERR(ctx->sw_cipher); } } ctx->generic.key_offs = spacc_alg->key_offs; @@ -1035,9 +1039,7 @@ static void spacc_ablk_cra_exit(struct crypto_tfm *tfm) { struct spacc_ablk_ctx *ctx = crypto_tfm_ctx(tfm); - if (ctx->sw_cipher) - crypto_free_ablkcipher(ctx->sw_cipher); - ctx->sw_cipher = NULL; + crypto_free_skcipher(ctx->sw_cipher); } static int spacc_ablk_encrypt(struct ablkcipher_request *req) -- cgit v1.2.3 From 2d20ce070d3b78f0974408ef648223967d0efb0a Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 18:04:04 +0800 Subject: crypto: qce - Use skcipher for fallback This patch replaces use of the obsolete ablkcipher with skcipher. Signed-off-by: Herbert Xu --- drivers/crypto/qce/ablkcipher.c | 27 ++++++++++++++++----------- drivers/crypto/qce/cipher.h | 2 +- 2 files changed, 17 insertions(+), 12 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/qce/ablkcipher.c b/drivers/crypto/qce/ablkcipher.c index dbcbbe242bd6..b04b42f48366 100644 --- a/drivers/crypto/qce/ablkcipher.c +++ b/drivers/crypto/qce/ablkcipher.c @@ -15,8 +15,8 @@ #include #include #include -#include #include +#include #include "cipher.h" @@ -189,7 +189,7 @@ static int qce_ablkcipher_setkey(struct crypto_ablkcipher *ablk, const u8 *key, memcpy(ctx->enc_key, key, keylen); return 0; fallback: - ret = crypto_ablkcipher_setkey(ctx->fallback, key, keylen); + ret = crypto_skcipher_setkey(ctx->fallback, key, keylen); if (!ret) ctx->enc_keylen = keylen; return ret; @@ -212,10 +212,16 @@ static int qce_ablkcipher_crypt(struct ablkcipher_request *req, int encrypt) if (IS_AES(rctx->flags) && ctx->enc_keylen != AES_KEYSIZE_128 && ctx->enc_keylen != AES_KEYSIZE_256) { - ablkcipher_request_set_tfm(req, ctx->fallback); - ret = encrypt ? crypto_ablkcipher_encrypt(req) : - crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + ret = encrypt ? crypto_skcipher_encrypt(subreq) : + crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return ret; } @@ -239,10 +245,9 @@ static int qce_ablkcipher_init(struct crypto_tfm *tfm) memset(ctx, 0, sizeof(*ctx)); tfm->crt_ablkcipher.reqsize = sizeof(struct qce_cipher_reqctx); - ctx->fallback = crypto_alloc_ablkcipher(crypto_tfm_alg_name(tfm), - CRYPTO_ALG_TYPE_ABLKCIPHER, - CRYPTO_ALG_ASYNC | - CRYPTO_ALG_NEED_FALLBACK); + ctx->fallback = crypto_alloc_skcipher(crypto_tfm_alg_name(tfm), 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->fallback)) return PTR_ERR(ctx->fallback); @@ -253,7 +258,7 @@ static void qce_ablkcipher_exit(struct crypto_tfm *tfm) { struct qce_cipher_ctx *ctx = crypto_tfm_ctx(tfm); - crypto_free_ablkcipher(ctx->fallback); + crypto_free_skcipher(ctx->fallback); } struct qce_ablkcipher_def { diff --git a/drivers/crypto/qce/cipher.h b/drivers/crypto/qce/cipher.h index 5c6a5f8633e5..2b0278bb6e92 100644 --- a/drivers/crypto/qce/cipher.h +++ b/drivers/crypto/qce/cipher.h @@ -22,7 +22,7 @@ struct qce_cipher_ctx { u8 enc_key[QCE_MAX_KEY_SIZE]; unsigned int enc_keylen; - struct crypto_ablkcipher *fallback; + struct crypto_skcipher *fallback; }; /** -- cgit v1.2.3 From 678adecd117f953cd21088064d95ceffac79a2a6 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 29 Jun 2016 18:04:05 +0800 Subject: crypto: sahara - Use skcipher for fallback This patch replaces use of the obsolete ablkcipher with skcipher. It also removes shash_fallback which is totally unused. Signed-off-by: Herbert Xu --- drivers/crypto/sahara.c | 112 +++++++++++++++++++++--------------------------- 1 file changed, 50 insertions(+), 62 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index c3f3d89e4831..0c49956ee0ce 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -14,10 +14,9 @@ * Based on omap-aes.c and tegra-aes.c */ -#include #include -#include #include +#include #include #include @@ -150,10 +149,7 @@ struct sahara_ctx { /* AES-specific context */ int keylen; u8 key[AES_KEYSIZE_128]; - struct crypto_ablkcipher *fallback; - - /* SHA-specific context */ - struct crypto_shash *shash_fallback; + struct crypto_skcipher *fallback; }; struct sahara_aes_reqctx { @@ -620,25 +616,21 @@ static int sahara_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, return 0; } - if (keylen != AES_KEYSIZE_128 && - keylen != AES_KEYSIZE_192 && keylen != AES_KEYSIZE_256) + if (keylen != AES_KEYSIZE_192 && keylen != AES_KEYSIZE_256) return -EINVAL; /* * The requested key size is not supported by HW, do a fallback. */ - ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK; - ctx->fallback->base.crt_flags |= - (tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK); + crypto_skcipher_clear_flags(ctx->fallback, CRYPTO_TFM_REQ_MASK); + crypto_skcipher_set_flags(ctx->fallback, tfm->base.crt_flags & + CRYPTO_TFM_REQ_MASK); - ret = crypto_ablkcipher_setkey(ctx->fallback, key, keylen); - if (ret) { - struct crypto_tfm *tfm_aux = crypto_ablkcipher_tfm(tfm); + ret = crypto_skcipher_setkey(ctx->fallback, key, keylen); - tfm_aux->crt_flags &= ~CRYPTO_TFM_RES_MASK; - tfm_aux->crt_flags |= - (ctx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK); - } + tfm->base.crt_flags &= ~CRYPTO_TFM_RES_MASK; + tfm->base.crt_flags |= crypto_skcipher_get_flags(ctx->fallback) & + CRYPTO_TFM_RES_MASK; return ret; } @@ -670,16 +662,20 @@ static int sahara_aes_crypt(struct ablkcipher_request *req, unsigned long mode) static int sahara_aes_ecb_encrypt(struct ablkcipher_request *req) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct sahara_ctx *ctx = crypto_ablkcipher_ctx( crypto_ablkcipher_reqtfm(req)); int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - ablkcipher_request_set_tfm(req, ctx->fallback); - err = crypto_ablkcipher_encrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = crypto_skcipher_encrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -688,16 +684,20 @@ static int sahara_aes_ecb_encrypt(struct ablkcipher_request *req) static int sahara_aes_ecb_decrypt(struct ablkcipher_request *req) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct sahara_ctx *ctx = crypto_ablkcipher_ctx( crypto_ablkcipher_reqtfm(req)); int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - ablkcipher_request_set_tfm(req, ctx->fallback); - err = crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -706,16 +706,20 @@ static int sahara_aes_ecb_decrypt(struct ablkcipher_request *req) static int sahara_aes_cbc_encrypt(struct ablkcipher_request *req) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct sahara_ctx *ctx = crypto_ablkcipher_ctx( crypto_ablkcipher_reqtfm(req)); int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - ablkcipher_request_set_tfm(req, ctx->fallback); - err = crypto_ablkcipher_encrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = crypto_skcipher_encrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -724,16 +728,20 @@ static int sahara_aes_cbc_encrypt(struct ablkcipher_request *req) static int sahara_aes_cbc_decrypt(struct ablkcipher_request *req) { - struct crypto_tfm *tfm = - crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req)); struct sahara_ctx *ctx = crypto_ablkcipher_ctx( crypto_ablkcipher_reqtfm(req)); int err; if (unlikely(ctx->keylen != AES_KEYSIZE_128)) { - ablkcipher_request_set_tfm(req, ctx->fallback); - err = crypto_ablkcipher_decrypt(req); - ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm)); + SKCIPHER_REQUEST_ON_STACK(subreq, ctx->fallback); + + skcipher_request_set_tfm(subreq, ctx->fallback); + skcipher_request_set_callback(subreq, req->base.flags, + NULL, NULL); + skcipher_request_set_crypt(subreq, req->src, req->dst, + req->nbytes, req->info); + err = crypto_skcipher_decrypt(subreq); + skcipher_request_zero(subreq); return err; } @@ -745,8 +753,9 @@ static int sahara_aes_cra_init(struct crypto_tfm *tfm) const char *name = crypto_tfm_alg_name(tfm); struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - ctx->fallback = crypto_alloc_ablkcipher(name, 0, - CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK); + ctx->fallback = crypto_alloc_skcipher(name, 0, + CRYPTO_ALG_ASYNC | + CRYPTO_ALG_NEED_FALLBACK); if (IS_ERR(ctx->fallback)) { pr_err("Error allocating fallback algo %s\n", name); return PTR_ERR(ctx->fallback); @@ -761,9 +770,7 @@ static void sahara_aes_cra_exit(struct crypto_tfm *tfm) { struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - if (ctx->fallback) - crypto_free_ablkcipher(ctx->fallback); - ctx->fallback = NULL; + crypto_free_skcipher(ctx->fallback); } static u32 sahara_sha_init_hdr(struct sahara_dev *dev, @@ -1180,15 +1187,6 @@ static int sahara_sha_import(struct ahash_request *req, const void *in) static int sahara_sha_cra_init(struct crypto_tfm *tfm) { - const char *name = crypto_tfm_alg_name(tfm); - struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - - ctx->shash_fallback = crypto_alloc_shash(name, 0, - CRYPTO_ALG_NEED_FALLBACK); - if (IS_ERR(ctx->shash_fallback)) { - pr_err("Error allocating fallback algo %s\n", name); - return PTR_ERR(ctx->shash_fallback); - } crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm), sizeof(struct sahara_sha_reqctx) + SHA_BUFFER_LEN + SHA256_BLOCK_SIZE); @@ -1196,14 +1194,6 @@ static int sahara_sha_cra_init(struct crypto_tfm *tfm) return 0; } -static void sahara_sha_cra_exit(struct crypto_tfm *tfm) -{ - struct sahara_ctx *ctx = crypto_tfm_ctx(tfm); - - crypto_free_shash(ctx->shash_fallback); - ctx->shash_fallback = NULL; -} - static struct crypto_alg aes_algs[] = { { .cra_name = "ecb(aes)", @@ -1272,7 +1262,6 @@ static struct ahash_alg sha_v3_algs[] = { .cra_alignmask = 0, .cra_module = THIS_MODULE, .cra_init = sahara_sha_cra_init, - .cra_exit = sahara_sha_cra_exit, } }, }; @@ -1300,7 +1289,6 @@ static struct ahash_alg sha_v4_algs[] = { .cra_alignmask = 0, .cra_module = THIS_MODULE, .cra_init = sahara_sha_cra_init, - .cra_exit = sahara_sha_cra_exit, } }, }; -- cgit v1.2.3 From eb3547859d73629c888825d6b928f2d0dba5af41 Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Thu, 30 Jun 2016 14:04:11 -0500 Subject: crypto: omap-sham - increase cra_proirity to 400 The arm-neon-sha implementations have cra_priority of 150...300, so increase omap-sham priority to 400 to ensure it is on top of any software alg. Signed-off-by: Bin Liu Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index ae6f841f81ff..7fe4eef12fe2 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -1334,7 +1334,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "sha1", .cra_driver_name = "omap-sha1", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1357,7 +1357,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "md5", .cra_driver_name = "omap-md5", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1381,7 +1381,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "hmac(sha1)", .cra_driver_name = "omap-hmac-sha1", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1406,7 +1406,7 @@ static struct ahash_alg algs_sha1_md5[] = { .halg.base = { .cra_name = "hmac(md5)", .cra_driver_name = "omap-hmac-md5", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_KERN_DRIVER_ONLY | CRYPTO_ALG_ASYNC | @@ -1434,7 +1434,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "sha224", .cra_driver_name = "omap-sha224", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1456,7 +1456,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "sha256", .cra_driver_name = "omap-sha256", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1479,7 +1479,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "hmac(sha224)", .cra_driver_name = "omap-hmac-sha224", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1503,7 +1503,7 @@ static struct ahash_alg algs_sha224_sha256[] = { .halg.base = { .cra_name = "hmac(sha256)", .cra_driver_name = "omap-hmac-sha256", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1529,7 +1529,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "sha384", .cra_driver_name = "omap-sha384", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1551,7 +1551,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "sha512", .cra_driver_name = "omap-sha512", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1574,7 +1574,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "hmac(sha384)", .cra_driver_name = "omap-hmac-sha384", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, @@ -1598,7 +1598,7 @@ static struct ahash_alg algs_sha384_sha512[] = { .halg.base = { .cra_name = "hmac(sha512)", .cra_driver_name = "omap-hmac-sha512", - .cra_priority = 100, + .cra_priority = 400, .cra_flags = CRYPTO_ALG_TYPE_AHASH | CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK, -- cgit v1.2.3 From 6889621fd2317f52fd2c5ef1178128156f39fa94 Mon Sep 17 00:00:00 2001 From: Salvatore Benedetto Date: Mon, 4 Jul 2016 10:49:28 +0100 Subject: crypto: qat - Switch to new rsa_helper functions Drop all asn1 related code and use the new rsa_helper functions rsa_parse_[pub|priv]_key for parsing the key Signed-off-by: Salvatore Benedetto Signed-off-by: Herbert Xu --- drivers/crypto/qat/Kconfig | 2 +- drivers/crypto/qat/qat_common/Makefile | 10 ----- drivers/crypto/qat/qat_common/qat_asym_algs.c | 49 +++++++++-------------- drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1 | 11 ----- drivers/crypto/qat/qat_common/qat_rsapubkey.asn1 | 4 -- 5 files changed, 21 insertions(+), 55 deletions(-) delete mode 100644 drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1 delete mode 100644 drivers/crypto/qat/qat_common/qat_rsapubkey.asn1 (limited to 'drivers/crypto') diff --git a/drivers/crypto/qat/Kconfig b/drivers/crypto/qat/Kconfig index 85b44e577684..571d04dda415 100644 --- a/drivers/crypto/qat/Kconfig +++ b/drivers/crypto/qat/Kconfig @@ -5,11 +5,11 @@ config CRYPTO_DEV_QAT select CRYPTO_BLKCIPHER select CRYPTO_AKCIPHER select CRYPTO_HMAC + select CRYPTO_RSA select CRYPTO_SHA1 select CRYPTO_SHA256 select CRYPTO_SHA512 select FW_LOADER - select ASN1 config CRYPTO_DEV_QAT_DH895xCC tristate "Support for Intel(R) DH895xCC" diff --git a/drivers/crypto/qat/qat_common/Makefile b/drivers/crypto/qat/qat_common/Makefile index 6d74b91f2152..92fb6ffdc062 100644 --- a/drivers/crypto/qat/qat_common/Makefile +++ b/drivers/crypto/qat/qat_common/Makefile @@ -1,11 +1,3 @@ -$(obj)/qat_rsapubkey-asn1.o: $(obj)/qat_rsapubkey-asn1.c \ - $(obj)/qat_rsapubkey-asn1.h -$(obj)/qat_rsaprivkey-asn1.o: $(obj)/qat_rsaprivkey-asn1.c \ - $(obj)/qat_rsaprivkey-asn1.h - -clean-files += qat_rsapubkey-asn1.c qat_rsapubkey-asn1.h -clean-files += qat_rsaprivkey-asn1.c qat_rsaprivkey-asn1.h - obj-$(CONFIG_CRYPTO_DEV_QAT) += intel_qat.o intel_qat-objs := adf_cfg.o \ adf_isr.o \ @@ -19,8 +11,6 @@ intel_qat-objs := adf_cfg.o \ adf_hw_arbiter.o \ qat_crypto.o \ qat_algs.o \ - qat_rsapubkey-asn1.o \ - qat_rsaprivkey-asn1.o \ qat_asym_algs.o \ qat_uclo.o \ qat_hal.o diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 05f49d4f94b2..04b0ef8cfaa1 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -52,8 +52,6 @@ #include #include #include -#include "qat_rsapubkey-asn1.h" -#include "qat_rsaprivkey-asn1.h" #include "icp_qat_fw_pke.h" #include "adf_accel_devices.h" #include "adf_transport.h" @@ -502,10 +500,8 @@ unmap_src: return ret; } -int qat_rsa_get_n(void *context, size_t hdrlen, unsigned char tag, - const void *value, size_t vlen) +int qat_rsa_set_n(struct qat_rsa_ctx *ctx, const char *value, size_t vlen) { - struct qat_rsa_ctx *ctx = context; struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); const char *ptr = value; @@ -518,11 +514,6 @@ int qat_rsa_get_n(void *context, size_t hdrlen, unsigned char tag, ctx->key_sz = vlen; ret = -EINVAL; - /* In FIPS mode only allow key size 2K & 3K */ - if (fips_enabled && (ctx->key_sz != 256 && ctx->key_sz != 384)) { - pr_err("QAT: RSA: key size not allowed in FIPS mode\n"); - goto err; - } /* invalid key size provided */ if (!qat_rsa_enc_fn_id(ctx->key_sz)) goto err; @@ -540,10 +531,8 @@ err: return ret; } -int qat_rsa_get_e(void *context, size_t hdrlen, unsigned char tag, - const void *value, size_t vlen) +int qat_rsa_set_e(struct qat_rsa_ctx *ctx, const char *value, size_t vlen) { - struct qat_rsa_ctx *ctx = context; struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); const char *ptr = value; @@ -559,18 +548,15 @@ int qat_rsa_get_e(void *context, size_t hdrlen, unsigned char tag, } ctx->e = dma_zalloc_coherent(dev, ctx->key_sz, &ctx->dma_e, GFP_KERNEL); - if (!ctx->e) { - ctx->e = NULL; + if (!ctx->e) return -ENOMEM; - } + memcpy(ctx->e + (ctx->key_sz - vlen), ptr, vlen); return 0; } -int qat_rsa_get_d(void *context, size_t hdrlen, unsigned char tag, - const void *value, size_t vlen) +int qat_rsa_set_d(struct qat_rsa_ctx *ctx, const char *value, size_t vlen) { - struct qat_rsa_ctx *ctx = context; struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); const char *ptr = value; @@ -585,12 +571,6 @@ int qat_rsa_get_d(void *context, size_t hdrlen, unsigned char tag, if (!ctx->key_sz || !vlen || vlen > ctx->key_sz) goto err; - /* In FIPS mode only allow key size 2K & 3K */ - if (fips_enabled && (vlen != 256 && vlen != 384)) { - pr_err("QAT: RSA: key size not allowed in FIPS mode\n"); - goto err; - } - ret = -ENOMEM; ctx->d = dma_zalloc_coherent(dev, ctx->key_sz, &ctx->dma_d, GFP_KERNEL); if (!ctx->d) @@ -608,6 +588,7 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, { struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); struct device *dev = &GET_DEV(ctx->inst->accel_dev); + struct rsa_key rsa_key; int ret; /* Free the old key if any */ @@ -625,13 +606,23 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, ctx->d = NULL; if (private) - ret = asn1_ber_decoder(&qat_rsaprivkey_decoder, ctx, key, - keylen); + ret = rsa_parse_priv_key(&rsa_key, key, keylen); else - ret = asn1_ber_decoder(&qat_rsapubkey_decoder, ctx, key, - keylen); + ret = rsa_parse_pub_key(&rsa_key, key, keylen); + if (ret < 0) + goto free; + + ret = qat_rsa_set_n(ctx, rsa_key.n, rsa_key.n_sz); if (ret < 0) goto free; + ret = qat_rsa_set_e(ctx, rsa_key.e, rsa_key.e_sz); + if (ret < 0) + goto free; + if (private) { + ret = qat_rsa_set_d(ctx, rsa_key.d, rsa_key.d_sz); + if (ret < 0) + goto free; + } if (!ctx->n || !ctx->e) { /* invalid key provided */ diff --git a/drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1 b/drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1 deleted file mode 100644 index f0066adb79b8..000000000000 --- a/drivers/crypto/qat/qat_common/qat_rsaprivkey.asn1 +++ /dev/null @@ -1,11 +0,0 @@ -RsaPrivKey ::= SEQUENCE { - version INTEGER, - n INTEGER ({ qat_rsa_get_n }), - e INTEGER ({ qat_rsa_get_e }), - d INTEGER ({ qat_rsa_get_d }), - prime1 INTEGER, - prime2 INTEGER, - exponent1 INTEGER, - exponent2 INTEGER, - coefficient INTEGER -} diff --git a/drivers/crypto/qat/qat_common/qat_rsapubkey.asn1 b/drivers/crypto/qat/qat_common/qat_rsapubkey.asn1 deleted file mode 100644 index bd667b31a21a..000000000000 --- a/drivers/crypto/qat/qat_common/qat_rsapubkey.asn1 +++ /dev/null @@ -1,4 +0,0 @@ -RsaPubKey ::= SEQUENCE { - n INTEGER ({ qat_rsa_get_n }), - e INTEGER ({ qat_rsa_get_e }) -} -- cgit v1.2.3 From 8c419778ab57e497b5de1352aa39dbe2efb3ed54 Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Mon, 4 Jul 2016 13:12:08 +0300 Subject: crypto: caam - add support for RSA algorithm Add RSA support to caam driver. Initial author is Yashpal Dutta . Signed-off-by: Tudor Ambarus Signed-off-by: Herbert Xu --- drivers/crypto/caam/Kconfig | 12 + drivers/crypto/caam/Makefile | 2 + drivers/crypto/caam/caampkc.c | 607 ++++++++++++++++++++++++++++++++++++++ drivers/crypto/caam/caampkc.h | 70 +++++ drivers/crypto/caam/compat.h | 3 + drivers/crypto/caam/desc.h | 2 + drivers/crypto/caam/desc_constr.h | 7 + drivers/crypto/caam/pdb.h | 51 +++- drivers/crypto/caam/pkc_desc.c | 36 +++ 9 files changed, 789 insertions(+), 1 deletion(-) create mode 100644 drivers/crypto/caam/caampkc.c create mode 100644 drivers/crypto/caam/caampkc.h create mode 100644 drivers/crypto/caam/pkc_desc.c (limited to 'drivers/crypto') diff --git a/drivers/crypto/caam/Kconfig b/drivers/crypto/caam/Kconfig index ff54c42e6e51..64bf3024b680 100644 --- a/drivers/crypto/caam/Kconfig +++ b/drivers/crypto/caam/Kconfig @@ -99,6 +99,18 @@ config CRYPTO_DEV_FSL_CAAM_AHASH_API To compile this as a module, choose M here: the module will be called caamhash. +config CRYPTO_DEV_FSL_CAAM_PKC_API + tristate "Register public key cryptography implementations with Crypto API" + depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR + default y + select CRYPTO_RSA + help + Selecting this will allow SEC Public key support for RSA. + Supported cryptographic primitives: encryption, decryption, + signature and verification. + To compile this as a module, choose M here: the module + will be called caam_pkc. + config CRYPTO_DEV_FSL_CAAM_RNG_API tristate "Register caam device for hwrng API" depends on CRYPTO_DEV_FSL_CAAM && CRYPTO_DEV_FSL_CAAM_JR diff --git a/drivers/crypto/caam/Makefile b/drivers/crypto/caam/Makefile index 3904700ef110..08bf5515ae8a 100644 --- a/drivers/crypto/caam/Makefile +++ b/drivers/crypto/caam/Makefile @@ -10,6 +10,8 @@ obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_JR) += caam_jr.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_CRYPTO_API) += caamalg.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_AHASH_API) += caamhash.o obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_RNG_API) += caamrng.o +obj-$(CONFIG_CRYPTO_DEV_FSL_CAAM_PKC_API) += caam_pkc.o caam-objs := ctrl.o caam_jr-objs := jr.o key_gen.o error.o +caam_pkc-y := caampkc.o pkc_desc.o diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c new file mode 100644 index 000000000000..851015e652b8 --- /dev/null +++ b/drivers/crypto/caam/caampkc.c @@ -0,0 +1,607 @@ +/* + * caam - Freescale FSL CAAM support for Public Key Cryptography + * + * Copyright 2016 Freescale Semiconductor, Inc. + * + * There is no Shared Descriptor for PKC so that the Job Descriptor must carry + * all the desired key parameters, input and output pointers. + */ +#include "compat.h" +#include "regs.h" +#include "intern.h" +#include "jr.h" +#include "error.h" +#include "desc_constr.h" +#include "sg_sw_sec4.h" +#include "caampkc.h" + +#define DESC_RSA_PUB_LEN (2 * CAAM_CMD_SZ + sizeof(struct rsa_pub_pdb)) +#define DESC_RSA_PRIV_F1_LEN (2 * CAAM_CMD_SZ + \ + sizeof(struct rsa_priv_f1_pdb)) + +static void rsa_io_unmap(struct device *dev, struct rsa_edesc *edesc, + struct akcipher_request *req) +{ + dma_unmap_sg(dev, req->dst, edesc->dst_nents, DMA_FROM_DEVICE); + dma_unmap_sg(dev, req->src, edesc->src_nents, DMA_TO_DEVICE); + + if (edesc->sec4_sg_bytes) + dma_unmap_single(dev, edesc->sec4_sg_dma, edesc->sec4_sg_bytes, + DMA_TO_DEVICE); +} + +static void rsa_pub_unmap(struct device *dev, struct rsa_edesc *edesc, + struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct rsa_pub_pdb *pdb = &edesc->pdb.pub; + + dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->e_dma, key->e_sz, DMA_TO_DEVICE); +} + +static void rsa_priv_f1_unmap(struct device *dev, struct rsa_edesc *edesc, + struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct rsa_priv_f1_pdb *pdb = &edesc->pdb.priv_f1; + + dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE); +} + +/* RSA Job Completion handler */ +static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context) +{ + struct akcipher_request *req = context; + struct rsa_edesc *edesc; + + if (err) + caam_jr_strstatus(dev, err); + + edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + + rsa_pub_unmap(dev, edesc, req); + rsa_io_unmap(dev, edesc, req); + kfree(edesc); + + akcipher_request_complete(req, err); +} + +static void rsa_priv_f1_done(struct device *dev, u32 *desc, u32 err, + void *context) +{ + struct akcipher_request *req = context; + struct rsa_edesc *edesc; + + if (err) + caam_jr_strstatus(dev, err); + + edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + + rsa_priv_f1_unmap(dev, edesc, req); + rsa_io_unmap(dev, edesc, req); + kfree(edesc); + + akcipher_request_complete(req, err); +} + +static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req, + size_t desclen) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct device *dev = ctx->dev; + struct rsa_edesc *edesc; + gfp_t flags = (req->base.flags & (CRYPTO_TFM_REQ_MAY_BACKLOG | + CRYPTO_TFM_REQ_MAY_SLEEP)) ? GFP_KERNEL : GFP_ATOMIC; + int sgc; + int sec4_sg_index, sec4_sg_len = 0, sec4_sg_bytes; + int src_nents, dst_nents; + + src_nents = sg_nents_for_len(req->src, req->src_len); + dst_nents = sg_nents_for_len(req->dst, req->dst_len); + + if (src_nents > 1) + sec4_sg_len = src_nents; + if (dst_nents > 1) + sec4_sg_len += dst_nents; + + sec4_sg_bytes = sec4_sg_len * sizeof(struct sec4_sg_entry); + + /* allocate space for base edesc, hw desc commands and link tables */ + edesc = kzalloc(sizeof(*edesc) + desclen + sec4_sg_bytes, + GFP_DMA | flags); + if (!edesc) + return ERR_PTR(-ENOMEM); + + sgc = dma_map_sg(dev, req->src, src_nents, DMA_TO_DEVICE); + if (unlikely(!sgc)) { + dev_err(dev, "unable to map source\n"); + goto src_fail; + } + + sgc = dma_map_sg(dev, req->dst, dst_nents, DMA_FROM_DEVICE); + if (unlikely(!sgc)) { + dev_err(dev, "unable to map destination\n"); + goto dst_fail; + } + + edesc->sec4_sg = (void *)edesc + sizeof(*edesc) + desclen; + + sec4_sg_index = 0; + if (src_nents > 1) { + sg_to_sec4_sg_last(req->src, src_nents, edesc->sec4_sg, 0); + sec4_sg_index += src_nents; + } + if (dst_nents > 1) + sg_to_sec4_sg_last(req->dst, dst_nents, + edesc->sec4_sg + sec4_sg_index, 0); + + /* Save nents for later use in Job Descriptor */ + edesc->src_nents = src_nents; + edesc->dst_nents = dst_nents; + + if (!sec4_sg_bytes) + return edesc; + + edesc->sec4_sg_dma = dma_map_single(dev, edesc->sec4_sg, + sec4_sg_bytes, DMA_TO_DEVICE); + if (dma_mapping_error(dev, edesc->sec4_sg_dma)) { + dev_err(dev, "unable to map S/G table\n"); + goto sec4_sg_fail; + } + + edesc->sec4_sg_bytes = sec4_sg_bytes; + + return edesc; + +sec4_sg_fail: + dma_unmap_sg(dev, req->dst, dst_nents, DMA_FROM_DEVICE); +dst_fail: + dma_unmap_sg(dev, req->src, src_nents, DMA_TO_DEVICE); +src_fail: + kfree(edesc); + return ERR_PTR(-ENOMEM); +} + +static int set_rsa_pub_pdb(struct akcipher_request *req, + struct rsa_edesc *edesc) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *dev = ctx->dev; + struct rsa_pub_pdb *pdb = &edesc->pdb.pub; + int sec4_sg_index = 0; + + pdb->n_dma = dma_map_single(dev, key->n, key->n_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->n_dma)) { + dev_err(dev, "Unable to map RSA modulus memory\n"); + return -ENOMEM; + } + + pdb->e_dma = dma_map_single(dev, key->e, key->e_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->e_dma)) { + dev_err(dev, "Unable to map RSA public exponent memory\n"); + dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE); + return -ENOMEM; + } + + if (edesc->src_nents > 1) { + pdb->sgf |= RSA_PDB_SGF_F; + pdb->f_dma = edesc->sec4_sg_dma; + sec4_sg_index += edesc->src_nents; + } else { + pdb->f_dma = sg_dma_address(req->src); + } + + if (edesc->dst_nents > 1) { + pdb->sgf |= RSA_PDB_SGF_G; + pdb->g_dma = edesc->sec4_sg_dma + + sec4_sg_index * sizeof(struct sec4_sg_entry); + } else { + pdb->g_dma = sg_dma_address(req->dst); + } + + pdb->sgf |= (key->e_sz << RSA_PDB_E_SHIFT) | key->n_sz; + pdb->f_len = req->src_len; + + return 0; +} + +static int set_rsa_priv_f1_pdb(struct akcipher_request *req, + struct rsa_edesc *edesc) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *dev = ctx->dev; + struct rsa_priv_f1_pdb *pdb = &edesc->pdb.priv_f1; + int sec4_sg_index = 0; + + pdb->n_dma = dma_map_single(dev, key->n, key->n_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->n_dma)) { + dev_err(dev, "Unable to map modulus memory\n"); + return -ENOMEM; + } + + pdb->d_dma = dma_map_single(dev, key->d, key->d_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->d_dma)) { + dev_err(dev, "Unable to map RSA private exponent memory\n"); + dma_unmap_single(dev, pdb->n_dma, key->n_sz, DMA_TO_DEVICE); + return -ENOMEM; + } + + if (edesc->src_nents > 1) { + pdb->sgf |= RSA_PRIV_PDB_SGF_G; + pdb->g_dma = edesc->sec4_sg_dma; + sec4_sg_index += edesc->src_nents; + } else { + pdb->g_dma = sg_dma_address(req->src); + } + + if (edesc->dst_nents > 1) { + pdb->sgf |= RSA_PRIV_PDB_SGF_F; + pdb->f_dma = edesc->sec4_sg_dma + + sec4_sg_index * sizeof(struct sec4_sg_entry); + } else { + pdb->f_dma = sg_dma_address(req->dst); + } + + pdb->sgf |= (key->d_sz << RSA_PDB_D_SHIFT) | key->n_sz; + + return 0; +} + +static int caam_rsa_enc(struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *jrdev = ctx->dev; + struct rsa_edesc *edesc; + int ret; + + if (unlikely(!key->n || !key->e)) + return -EINVAL; + + if (req->dst_len < key->n_sz) { + req->dst_len = key->n_sz; + dev_err(jrdev, "Output buffer length less than parameter n\n"); + return -EOVERFLOW; + } + + /* Allocate extended descriptor */ + edesc = rsa_edesc_alloc(req, DESC_RSA_PUB_LEN); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + /* Set RSA Encrypt Protocol Data Block */ + ret = set_rsa_pub_pdb(req, edesc); + if (ret) + goto init_fail; + + /* Initialize Job Descriptor */ + init_rsa_pub_desc(edesc->hw_desc, &edesc->pdb.pub); + + ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_pub_done, req); + if (!ret) + return -EINPROGRESS; + + rsa_pub_unmap(jrdev, edesc, req); + +init_fail: + rsa_io_unmap(jrdev, edesc, req); + kfree(edesc); + return ret; +} + +static int caam_rsa_dec(struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *jrdev = ctx->dev; + struct rsa_edesc *edesc; + int ret; + + if (unlikely(!key->n || !key->d)) + return -EINVAL; + + if (req->dst_len < key->n_sz) { + req->dst_len = key->n_sz; + dev_err(jrdev, "Output buffer length less than parameter n\n"); + return -EOVERFLOW; + } + + /* Allocate extended descriptor */ + edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F1_LEN); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + /* Set RSA Decrypt Protocol Data Block - Private Key Form #1 */ + ret = set_rsa_priv_f1_pdb(req, edesc); + if (ret) + goto init_fail; + + /* Initialize Job Descriptor */ + init_rsa_priv_f1_desc(edesc->hw_desc, &edesc->pdb.priv_f1); + + ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f1_done, req); + if (!ret) + return -EINPROGRESS; + + rsa_priv_f1_unmap(jrdev, edesc, req); + +init_fail: + rsa_io_unmap(jrdev, edesc, req); + kfree(edesc); + return ret; +} + +static void caam_rsa_free_key(struct caam_rsa_key *key) +{ + kzfree(key->d); + kfree(key->e); + kfree(key->n); + key->d = NULL; + key->e = NULL; + key->n = NULL; + key->d_sz = 0; + key->e_sz = 0; + key->n_sz = 0; +} + +/** + * caam_read_raw_data - Read a raw byte stream as a positive integer. + * The function skips buffer's leading zeros, copies the remained data + * to a buffer allocated in the GFP_DMA | GFP_KERNEL zone and returns + * the address of the new buffer. + * + * @buf : The data to read + * @nbytes: The amount of data to read + */ +static inline u8 *caam_read_raw_data(const u8 *buf, size_t *nbytes) +{ + u8 *val; + + while (!*buf && *nbytes) { + buf++; + (*nbytes)--; + } + + val = kzalloc(*nbytes, GFP_DMA | GFP_KERNEL); + if (!val) + return NULL; + + memcpy(val, buf, *nbytes); + + return val; +} + +static int caam_rsa_check_key_length(unsigned int len) +{ + if (len > 4096) + return -EINVAL; + return 0; +} + +static int caam_rsa_set_pub_key(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct rsa_key raw_key = {0}; + struct caam_rsa_key *rsa_key = &ctx->key; + int ret; + + /* Free the old RSA key if any */ + caam_rsa_free_key(rsa_key); + + ret = rsa_parse_pub_key(&raw_key, key, keylen); + if (ret) + return ret; + + /* Copy key in DMA zone */ + rsa_key->e = kzalloc(raw_key.e_sz, GFP_DMA | GFP_KERNEL); + if (!rsa_key->e) + goto err; + + /* + * Skip leading zeros and copy the positive integer to a buffer + * allocated in the GFP_DMA | GFP_KERNEL zone. The decryption descriptor + * expects a positive integer for the RSA modulus and uses its length as + * decryption output length. + */ + rsa_key->n = caam_read_raw_data(raw_key.n, &raw_key.n_sz); + if (!rsa_key->n) + goto err; + + if (caam_rsa_check_key_length(raw_key.n_sz << 3)) { + caam_rsa_free_key(rsa_key); + return -EINVAL; + } + + rsa_key->e_sz = raw_key.e_sz; + rsa_key->n_sz = raw_key.n_sz; + + memcpy(rsa_key->e, raw_key.e, raw_key.e_sz); + + return 0; +err: + caam_rsa_free_key(rsa_key); + return -ENOMEM; +} + +static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct rsa_key raw_key = {0}; + struct caam_rsa_key *rsa_key = &ctx->key; + int ret; + + /* Free the old RSA key if any */ + caam_rsa_free_key(rsa_key); + + ret = rsa_parse_priv_key(&raw_key, key, keylen); + if (ret) + return ret; + + /* Copy key in DMA zone */ + rsa_key->d = kzalloc(raw_key.d_sz, GFP_DMA | GFP_KERNEL); + if (!rsa_key->d) + goto err; + + rsa_key->e = kzalloc(raw_key.e_sz, GFP_DMA | GFP_KERNEL); + if (!rsa_key->e) + goto err; + + /* + * Skip leading zeros and copy the positive integer to a buffer + * allocated in the GFP_DMA | GFP_KERNEL zone. The decryption descriptor + * expects a positive integer for the RSA modulus and uses its length as + * decryption output length. + */ + rsa_key->n = caam_read_raw_data(raw_key.n, &raw_key.n_sz); + if (!rsa_key->n) + goto err; + + if (caam_rsa_check_key_length(raw_key.n_sz << 3)) { + caam_rsa_free_key(rsa_key); + return -EINVAL; + } + + rsa_key->d_sz = raw_key.d_sz; + rsa_key->e_sz = raw_key.e_sz; + rsa_key->n_sz = raw_key.n_sz; + + memcpy(rsa_key->d, raw_key.d, raw_key.d_sz); + memcpy(rsa_key->e, raw_key.e, raw_key.e_sz); + + return 0; + +err: + caam_rsa_free_key(rsa_key); + return -ENOMEM; +} + +static int caam_rsa_max_size(struct crypto_akcipher *tfm) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + + return (key->n) ? key->n_sz : -EINVAL; +} + +/* Per session pkc's driver context creation function */ +static int caam_rsa_init_tfm(struct crypto_akcipher *tfm) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + + ctx->dev = caam_jr_alloc(); + + if (IS_ERR(ctx->dev)) { + dev_err(ctx->dev, "Job Ring Device allocation for transform failed\n"); + return PTR_ERR(ctx->dev); + } + + return 0; +} + +/* Per session pkc's driver context cleanup function */ +static void caam_rsa_exit_tfm(struct crypto_akcipher *tfm) +{ + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + + caam_rsa_free_key(key); + caam_jr_free(ctx->dev); +} + +static struct akcipher_alg caam_rsa = { + .encrypt = caam_rsa_enc, + .decrypt = caam_rsa_dec, + .sign = caam_rsa_dec, + .verify = caam_rsa_enc, + .set_pub_key = caam_rsa_set_pub_key, + .set_priv_key = caam_rsa_set_priv_key, + .max_size = caam_rsa_max_size, + .init = caam_rsa_init_tfm, + .exit = caam_rsa_exit_tfm, + .base = { + .cra_name = "rsa", + .cra_driver_name = "rsa-caam", + .cra_priority = 3000, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct caam_rsa_ctx), + }, +}; + +/* Public Key Cryptography module initialization handler */ +static int __init caam_pkc_init(void) +{ + struct device_node *dev_node; + struct platform_device *pdev; + struct device *ctrldev; + struct caam_drv_private *priv; + u32 cha_inst, pk_inst; + int err; + + dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec-v4.0"); + if (!dev_node) { + dev_node = of_find_compatible_node(NULL, NULL, "fsl,sec4.0"); + if (!dev_node) + return -ENODEV; + } + + pdev = of_find_device_by_node(dev_node); + if (!pdev) { + of_node_put(dev_node); + return -ENODEV; + } + + ctrldev = &pdev->dev; + priv = dev_get_drvdata(ctrldev); + of_node_put(dev_node); + + /* + * If priv is NULL, it's probably because the caam driver wasn't + * properly initialized (e.g. RNG4 init failed). Thus, bail out here. + */ + if (!priv) + return -ENODEV; + + /* Determine public key hardware accelerator presence. */ + cha_inst = rd_reg32(&priv->ctrl->perfmon.cha_num_ls); + pk_inst = (cha_inst & CHA_ID_LS_PK_MASK) >> CHA_ID_LS_PK_SHIFT; + + /* Do not register algorithms if PKHA is not present. */ + if (!pk_inst) + return -ENODEV; + + err = crypto_register_akcipher(&caam_rsa); + if (err) + dev_warn(ctrldev, "%s alg registration failed\n", + caam_rsa.base.cra_driver_name); + else + dev_info(ctrldev, "caam pkc algorithms registered in /proc/crypto\n"); + + return err; +} + +static void __exit caam_pkc_exit(void) +{ + crypto_unregister_akcipher(&caam_rsa); +} + +module_init(caam_pkc_init); +module_exit(caam_pkc_exit); + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_DESCRIPTION("FSL CAAM support for PKC functions of crypto API"); +MODULE_AUTHOR("Freescale Semiconductor"); diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h new file mode 100644 index 000000000000..f595d159b112 --- /dev/null +++ b/drivers/crypto/caam/caampkc.h @@ -0,0 +1,70 @@ +/* + * caam - Freescale FSL CAAM support for Public Key Cryptography descriptors + * + * Copyright 2016 Freescale Semiconductor, Inc. + * + * There is no Shared Descriptor for PKC so that the Job Descriptor must carry + * all the desired key parameters, input and output pointers. + */ + +#ifndef _PKC_DESC_H_ +#define _PKC_DESC_H_ +#include "compat.h" +#include "pdb.h" + +/** + * caam_rsa_key - CAAM RSA key structure. Keys are allocated in DMA zone. + * @n : RSA modulus raw byte stream + * @e : RSA public exponent raw byte stream + * @d : RSA private exponent raw byte stream + * @n_sz : length in bytes of RSA modulus n + * @e_sz : length in bytes of RSA public exponent + * @d_sz : length in bytes of RSA private exponent + */ +struct caam_rsa_key { + u8 *n; + u8 *e; + u8 *d; + size_t n_sz; + size_t e_sz; + size_t d_sz; +}; + +/** + * caam_rsa_ctx - per session context. + * @key : RSA key in DMA zone + * @dev : device structure + */ +struct caam_rsa_ctx { + struct caam_rsa_key key; + struct device *dev; +}; + +/** + * rsa_edesc - s/w-extended rsa descriptor + * @src_nents : number of segments in input scatterlist + * @dst_nents : number of segments in output scatterlist + * @sec4_sg_bytes : length of h/w link table + * @sec4_sg_dma : dma address of h/w link table + * @sec4_sg : pointer to h/w link table + * @pdb : specific RSA Protocol Data Block (PDB) + * @hw_desc : descriptor followed by link tables if any + */ +struct rsa_edesc { + int src_nents; + int dst_nents; + int sec4_sg_bytes; + dma_addr_t sec4_sg_dma; + struct sec4_sg_entry *sec4_sg; + union { + struct rsa_pub_pdb pub; + struct rsa_priv_f1_pdb priv_f1; + } pdb; + u32 hw_desc[]; +}; + +/* Descriptor construction primitives. */ +void init_rsa_pub_desc(u32 *desc, struct rsa_pub_pdb *pdb); +void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb); + +#endif diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h index b6955ecdfb3f..7149cd2492e0 100644 --- a/drivers/crypto/caam/compat.h +++ b/drivers/crypto/caam/compat.h @@ -35,8 +35,11 @@ #include #include #include +#include #include #include #include +#include +#include #endif /* !defined(CAAM_COMPAT_H) */ diff --git a/drivers/crypto/caam/desc.h b/drivers/crypto/caam/desc.h index d8d5584b600b..26427c11ad87 100644 --- a/drivers/crypto/caam/desc.h +++ b/drivers/crypto/caam/desc.h @@ -453,6 +453,8 @@ struct sec4_sg_entry { #define OP_PCLID_PUBLICKEYPAIR (0x14 << OP_PCLID_SHIFT) #define OP_PCLID_DSASIGN (0x15 << OP_PCLID_SHIFT) #define OP_PCLID_DSAVERIFY (0x16 << OP_PCLID_SHIFT) +#define OP_PCLID_RSAENC_PUBKEY (0x18 << OP_PCLID_SHIFT) +#define OP_PCLID_RSADEC_PRVKEY (0x19 << OP_PCLID_SHIFT) /* Assuming OP_TYPE = OP_TYPE_DECAP_PROTOCOL/ENCAP_PROTOCOL */ #define OP_PCLID_IPSEC (0x01 << OP_PCLID_SHIFT) diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h index ae3aef6e9fee..d3869b95e7b1 100644 --- a/drivers/crypto/caam/desc_constr.h +++ b/drivers/crypto/caam/desc_constr.h @@ -77,6 +77,13 @@ static inline void init_job_desc(u32 *desc, u32 options) init_desc(desc, CMD_DESC_HDR | options); } +static inline void init_job_desc_pdb(u32 *desc, u32 options, size_t pdb_bytes) +{ + u32 pdb_len = (pdb_bytes + CAAM_CMD_SZ - 1) / CAAM_CMD_SZ; + + init_job_desc(desc, (((pdb_len + 1) << HDR_START_IDX_SHIFT)) | options); +} + static inline void append_ptr(u32 *desc, dma_addr_t ptr) { dma_addr_t *offset = (dma_addr_t *)desc_end(desc); diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h index d383573d3dd4..aaa00dd1c601 100644 --- a/drivers/crypto/caam/pdb.h +++ b/drivers/crypto/caam/pdb.h @@ -1,12 +1,13 @@ /* * CAAM Protocol Data Block (PDB) definition header file * - * Copyright 2008-2012 Freescale Semiconductor, Inc. + * Copyright 2008-2016 Freescale Semiconductor, Inc. * */ #ifndef CAAM_PDB_H #define CAAM_PDB_H +#include "compat.h" /* * PDB- IPSec ESP Header Modification Options @@ -476,4 +477,52 @@ struct dsa_verify_pdb { u8 *ab; /* only used if ECC processing */ }; +/* RSA Protocol Data Block */ +#define RSA_PDB_SGF_SHIFT 28 +#define RSA_PDB_E_SHIFT 12 +#define RSA_PDB_E_MASK (0xFFF << RSA_PDB_E_SHIFT) +#define RSA_PDB_D_SHIFT 12 +#define RSA_PDB_D_MASK (0xFFF << RSA_PDB_D_SHIFT) + +#define RSA_PDB_SGF_F (0x8 << RSA_PDB_SGF_SHIFT) +#define RSA_PDB_SGF_G (0x4 << RSA_PDB_SGF_SHIFT) +#define RSA_PRIV_PDB_SGF_F (0x4 << RSA_PDB_SGF_SHIFT) +#define RSA_PRIV_PDB_SGF_G (0x8 << RSA_PDB_SGF_SHIFT) + +#define RSA_PRIV_KEY_FRM_1 0 + +/** + * RSA Encrypt Protocol Data Block + * @sgf: scatter-gather field + * @f_dma: dma address of input data + * @g_dma: dma address of encrypted output data + * @n_dma: dma address of RSA modulus + * @e_dma: dma address of RSA public exponent + * @f_len: length in octets of the input data + */ +struct rsa_pub_pdb { + u32 sgf; + dma_addr_t f_dma; + dma_addr_t g_dma; + dma_addr_t n_dma; + dma_addr_t e_dma; + u32 f_len; +} __packed; + +/** + * RSA Decrypt PDB - Private Key Form #1 + * @sgf: scatter-gather field + * @g_dma: dma address of encrypted input data + * @f_dma: dma address of output data + * @n_dma: dma address of RSA modulus + * @d_dma: dma address of RSA private exponent + */ +struct rsa_priv_f1_pdb { + u32 sgf; + dma_addr_t g_dma; + dma_addr_t f_dma; + dma_addr_t n_dma; + dma_addr_t d_dma; +} __packed; + #endif diff --git a/drivers/crypto/caam/pkc_desc.c b/drivers/crypto/caam/pkc_desc.c new file mode 100644 index 000000000000..4e4183e615ea --- /dev/null +++ b/drivers/crypto/caam/pkc_desc.c @@ -0,0 +1,36 @@ +/* + * caam - Freescale FSL CAAM support for Public Key Cryptography descriptors + * + * Copyright 2016 Freescale Semiconductor, Inc. + * + * There is no Shared Descriptor for PKC so that the Job Descriptor must carry + * all the desired key parameters, input and output pointers. + */ +#include "caampkc.h" +#include "desc_constr.h" + +/* Descriptor for RSA Public operation */ +void init_rsa_pub_desc(u32 *desc, struct rsa_pub_pdb *pdb) +{ + init_job_desc_pdb(desc, 0, sizeof(*pdb)); + append_cmd(desc, pdb->sgf); + append_ptr(desc, pdb->f_dma); + append_ptr(desc, pdb->g_dma); + append_ptr(desc, pdb->n_dma); + append_ptr(desc, pdb->e_dma); + append_cmd(desc, pdb->f_len); + append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSAENC_PUBKEY); +} + +/* Descriptor for RSA Private operation - Private Key Form #1 */ +void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb) +{ + init_job_desc_pdb(desc, 0, sizeof(*pdb)); + append_cmd(desc, pdb->sgf); + append_ptr(desc, pdb->g_dma); + append_ptr(desc, pdb->f_dma); + append_ptr(desc, pdb->n_dma); + append_ptr(desc, pdb->d_dma); + append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY | + RSA_PRIV_KEY_FRM_1); +} -- cgit v1.2.3 From 8d63a6d53b2afaf7b48e89a03bb94c3246f33204 Mon Sep 17 00:00:00 2001 From: Amitoj Kaur Chawla Date: Mon, 4 Jul 2016 18:30:21 +0530 Subject: crypto: bfin_crc - Simplify use of devm_ioremap_resource Remove unneeded error handling on the result of a call to platform_get_resource when the value is passed to devm_ioremap_resource. The Coccinelle semantic patch that makes this change is as follows: // @@ expression pdev,res,n,e,e1; expression ret != 0; identifier l; @@ - res = platform_get_resource(pdev, IORESOURCE_MEM, n); ... when != res - if (res == NULL) { ... \(goto l;\|return ret;\) } ... when != res + res = platform_get_resource(pdev, IORESOURCE_MEM, n); e = devm_ioremap_resource(e1, res); // Signed-off-by: Amitoj Kaur Chawla Signed-off-by: Herbert Xu --- drivers/crypto/bfin_crc.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c index 95b73968cf72..10db7df366c8 100644 --- a/drivers/crypto/bfin_crc.c +++ b/drivers/crypto/bfin_crc.c @@ -588,11 +588,6 @@ static int bfin_crypto_crc_probe(struct platform_device *pdev) crypto_init_queue(&crc->queue, CRC_CCRYPTO_QUEUE_LENGTH); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res == NULL) { - dev_err(&pdev->dev, "Cannot get IORESOURCE_MEM\n"); - return -ENOENT; - } - crc->regs = devm_ioremap_resource(dev, res); if (IS_ERR((void *)crc->regs)) { dev_err(&pdev->dev, "Cannot map CRC IO\n"); -- cgit v1.2.3 From e24860f2a6b50eb09f3a627e168433005798f8d9 Mon Sep 17 00:00:00 2001 From: Conor McLoughlin Date: Mon, 4 Jul 2016 16:26:00 +0100 Subject: crypto: qat - Use alternative reset methods depending on the specific device Different product families will use FLR or SBR. Virtual Function devices have no reset method. Signed-off-by: Conor McLoughlin Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c | 1 + drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c | 1 + drivers/crypto/qat/qat_common/adf_accel_devices.h | 1 + drivers/crypto/qat/qat_common/adf_aer.c | 46 +++++++++++++++++----- drivers/crypto/qat/qat_common/adf_common_drv.h | 2 + .../crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c | 1 + 6 files changed, 43 insertions(+), 9 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c index c5bd5a9abc4d..6bc68bc00d76 100644 --- a/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c +++ b/drivers/crypto/qat/qat_c3xxx/adf_c3xxx_hw_data.c @@ -229,6 +229,7 @@ void adf_init_hw_data_c3xxx(struct adf_hw_device_data *hw_data) hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->reset_device = adf_reset_flr; hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; } diff --git a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c index 879e04cae714..618cec360b39 100644 --- a/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c +++ b/drivers/crypto/qat/qat_c62x/adf_c62x_hw_data.c @@ -239,6 +239,7 @@ void adf_init_hw_data_c62x(struct adf_hw_device_data *hw_data) hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->reset_device = adf_reset_flr; hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; } diff --git a/drivers/crypto/qat/qat_common/adf_accel_devices.h b/drivers/crypto/qat/qat_common/adf_accel_devices.h index 5a07208ce778..e8822536530b 100644 --- a/drivers/crypto/qat/qat_common/adf_accel_devices.h +++ b/drivers/crypto/qat/qat_common/adf_accel_devices.h @@ -176,6 +176,7 @@ struct adf_hw_device_data { void (*disable_iov)(struct adf_accel_dev *accel_dev); void (*enable_ints)(struct adf_accel_dev *accel_dev); int (*enable_vf2pf_comms)(struct adf_accel_dev *accel_dev); + void (*reset_device)(struct adf_accel_dev *accel_dev); const char *fw_name; const char *fw_mmp_name; uint32_t fuses; diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index 7bfb57f1bcb5..2839fccdd84b 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -82,18 +82,12 @@ struct adf_reset_dev_data { struct work_struct reset_work; }; -void adf_dev_restore(struct adf_accel_dev *accel_dev) +void adf_reset_sbr(struct adf_accel_dev *accel_dev) { struct pci_dev *pdev = accel_to_pci_dev(accel_dev); struct pci_dev *parent = pdev->bus->self; uint16_t bridge_ctl = 0; - if (accel_dev->is_vf) - return; - - dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n", - accel_dev->accel_id); - if (!parent) parent = pdev; @@ -101,6 +95,8 @@ void adf_dev_restore(struct adf_accel_dev *accel_dev) dev_info(&GET_DEV(accel_dev), "Transaction still in progress. Proceeding\n"); + dev_info(&GET_DEV(accel_dev), "Secondary bus reset\n"); + pci_read_config_word(parent, PCI_BRIDGE_CONTROL, &bridge_ctl); bridge_ctl |= PCI_BRIDGE_CTL_BUS_RESET; pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl); @@ -108,8 +104,40 @@ void adf_dev_restore(struct adf_accel_dev *accel_dev) bridge_ctl &= ~PCI_BRIDGE_CTL_BUS_RESET; pci_write_config_word(parent, PCI_BRIDGE_CONTROL, bridge_ctl); msleep(100); - pci_restore_state(pdev); - pci_save_state(pdev); +} +EXPORT_SYMBOL_GPL(adf_reset_sbr); + +void adf_reset_flr(struct adf_accel_dev *accel_dev) +{ + struct pci_dev *pdev = accel_to_pci_dev(accel_dev); + u16 control = 0; + int pos = 0; + + dev_info(&GET_DEV(accel_dev), "Function level reset\n"); + pos = pci_pcie_cap(pdev); + if (!pos) { + dev_err(&GET_DEV(accel_dev), "Restart device failed\n"); + return; + } + pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &control); + control |= PCI_EXP_DEVCTL_BCR_FLR; + pci_write_config_word(pdev, pos + PCI_EXP_DEVCTL, control); + msleep(100); +} +EXPORT_SYMBOL_GPL(adf_reset_flr); + +void adf_dev_restore(struct adf_accel_dev *accel_dev) +{ + struct adf_hw_device_data *hw_device = accel_dev->hw_device; + struct pci_dev *pdev = accel_to_pci_dev(accel_dev); + + if (hw_device->reset_device) { + dev_info(&GET_DEV(accel_dev), "Resetting device qat_dev%d\n", + accel_dev->accel_id); + hw_device->reset_device(accel_dev); + pci_restore_state(pdev); + pci_save_state(pdev); + } } static void adf_device_reset_worker(struct work_struct *work) diff --git a/drivers/crypto/qat/qat_common/adf_common_drv.h b/drivers/crypto/qat/qat_common/adf_common_drv.h index 75faa39bc8d0..980e07475012 100644 --- a/drivers/crypto/qat/qat_common/adf_common_drv.h +++ b/drivers/crypto/qat/qat_common/adf_common_drv.h @@ -141,6 +141,8 @@ int adf_ae_stop(struct adf_accel_dev *accel_dev); int adf_enable_aer(struct adf_accel_dev *accel_dev, struct pci_driver *adf); void adf_disable_aer(struct adf_accel_dev *accel_dev); +void adf_reset_sbr(struct adf_accel_dev *accel_dev); +void adf_reset_flr(struct adf_accel_dev *accel_dev); void adf_dev_restore(struct adf_accel_dev *accel_dev); int adf_init_aer(void); void adf_exit_aer(void); diff --git a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c index 6e1d5e185526..1dfcab317bed 100644 --- a/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c +++ b/drivers/crypto/qat/qat_dh895xcc/adf_dh895xcc_hw_data.c @@ -252,6 +252,7 @@ void adf_init_hw_data_dh895xcc(struct adf_hw_device_data *hw_data) hw_data->get_arb_mapping = adf_get_arbiter_mapping; hw_data->enable_ints = adf_enable_ints; hw_data->enable_vf2pf_comms = adf_pf_enable_vf2pf_comms; + hw_data->reset_device = adf_reset_sbr; hw_data->min_iov_compat_ver = ADF_PFVF_COMPATIBILITY_VERSION; } -- cgit v1.2.3 From 879f77e9071f029e1c9bd5a75814ecf51370f846 Mon Sep 17 00:00:00 2001 From: Salvatore Benedetto Date: Mon, 4 Jul 2016 17:21:40 +0100 Subject: crypto: qat - Add RSA CRT mode Extend qat driver to use RSA CRT mode when all CRT related components are present in the private key. Simplify code in qat_rsa_setkey by adding qat_rsa_clear_ctx. Signed-off-by: Salvatore Benedetto Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 234 +++++++++++++++++++++++--- 1 file changed, 209 insertions(+), 25 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 04b0ef8cfaa1..eaff02a3b1ac 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -73,6 +73,14 @@ struct qat_rsa_input_params { dma_addr_t d; dma_addr_t n; } dec; + struct { + dma_addr_t c; + dma_addr_t p; + dma_addr_t q; + dma_addr_t dp; + dma_addr_t dq; + dma_addr_t qinv; + } dec_crt; u64 in_tab[8]; }; } __packed __aligned(64); @@ -93,10 +101,21 @@ struct qat_rsa_ctx { char *n; char *e; char *d; + char *p; + char *q; + char *dp; + char *dq; + char *qinv; dma_addr_t dma_n; dma_addr_t dma_e; dma_addr_t dma_d; + dma_addr_t dma_p; + dma_addr_t dma_q; + dma_addr_t dma_dp; + dma_addr_t dma_dq; + dma_addr_t dma_qinv; unsigned int key_sz; + bool crt_mode; struct qat_crypto_instance *inst; } __packed __aligned(64); @@ -235,6 +254,35 @@ static unsigned long qat_rsa_dec_fn_id(unsigned int len) }; } +#define PKE_RSA_DP2_512 0x1c131b57 +#define PKE_RSA_DP2_1024 0x26131c2d +#define PKE_RSA_DP2_1536 0x45111d12 +#define PKE_RSA_DP2_2048 0x59121dfa +#define PKE_RSA_DP2_3072 0x81121ed9 +#define PKE_RSA_DP2_4096 0xb1111fb2 + +static unsigned long qat_rsa_dec_fn_id_crt(unsigned int len) +{ + unsigned int bitslen = len << 3; + + switch (bitslen) { + case 512: + return PKE_RSA_DP2_512; + case 1024: + return PKE_RSA_DP2_1024; + case 1536: + return PKE_RSA_DP2_1536; + case 2048: + return PKE_RSA_DP2_2048; + case 3072: + return PKE_RSA_DP2_3072; + case 4096: + return PKE_RSA_DP2_4096; + default: + return 0; + }; +} + static int qat_rsa_enc(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); @@ -388,7 +436,9 @@ static int qat_rsa_dec(struct akcipher_request *req) memset(msg, '\0', sizeof(*msg)); ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr, ICP_QAT_FW_COMN_REQ_FLAG_SET); - msg->pke_hdr.cd_pars.func_id = qat_rsa_dec_fn_id(ctx->key_sz); + msg->pke_hdr.cd_pars.func_id = ctx->crt_mode ? + qat_rsa_dec_fn_id_crt(ctx->key_sz) : + qat_rsa_dec_fn_id(ctx->key_sz); if (unlikely(!msg->pke_hdr.cd_pars.func_id)) return -EINVAL; @@ -398,8 +448,16 @@ static int qat_rsa_dec(struct akcipher_request *req) ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT, QAT_COMN_CD_FLD_TYPE_64BIT_ADR); - qat_req->in.dec.d = ctx->dma_d; - qat_req->in.dec.n = ctx->dma_n; + if (ctx->crt_mode) { + qat_req->in.dec_crt.p = ctx->dma_p; + qat_req->in.dec_crt.q = ctx->dma_q; + qat_req->in.dec_crt.dp = ctx->dma_dp; + qat_req->in.dec_crt.dq = ctx->dma_dq; + qat_req->in.dec_crt.qinv = ctx->dma_qinv; + } else { + qat_req->in.dec.d = ctx->dma_d; + qat_req->in.dec.n = ctx->dma_n; + } ret = -ENOMEM; /* @@ -446,7 +504,10 @@ static int qat_rsa_dec(struct akcipher_request *req) } - qat_req->in.in_tab[3] = 0; + if (ctx->crt_mode) + qat_req->in.in_tab[6] = 0; + else + qat_req->in.in_tab[3] = 0; qat_req->out.out_tab[1] = 0; qat_req->phy_in = dma_map_single(dev, &qat_req->in.dec.c, sizeof(struct qat_rsa_input_params), @@ -463,7 +524,11 @@ static int qat_rsa_dec(struct akcipher_request *req) msg->pke_mid.src_data_addr = qat_req->phy_in; msg->pke_mid.dest_data_addr = qat_req->phy_out; msg->pke_mid.opaque = (uint64_t)(__force long)req; - msg->input_param_count = 3; + if (ctx->crt_mode) + msg->input_param_count = 6; + else + msg->input_param_count = 3; + msg->output_param_count = 1; do { ret = adf_send_message(ctx->inst->pke_tx, (uint32_t *)msg); @@ -583,13 +648,106 @@ err: return ret; } -static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, - unsigned int keylen, bool private) +static void qat_rsa_drop_leading_zeros(const char **ptr, unsigned int *len) { - struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); - struct device *dev = &GET_DEV(ctx->inst->accel_dev); - struct rsa_key rsa_key; - int ret; + while (!**ptr && *len) { + (*ptr)++; + (*len)--; + } +} + +static void qat_rsa_setkey_crt(struct qat_rsa_ctx *ctx, struct rsa_key *rsa_key) +{ + struct qat_crypto_instance *inst = ctx->inst; + struct device *dev = &GET_DEV(inst->accel_dev); + const char *ptr; + unsigned int len; + unsigned int half_key_sz = ctx->key_sz / 2; + + /* p */ + ptr = rsa_key->p; + len = rsa_key->p_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto err; + ctx->p = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_p, GFP_KERNEL); + if (!ctx->p) + goto err; + memcpy(ctx->p + (half_key_sz - len), ptr, len); + + /* q */ + ptr = rsa_key->q; + len = rsa_key->q_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto free_p; + ctx->q = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_q, GFP_KERNEL); + if (!ctx->q) + goto free_p; + memcpy(ctx->q + (half_key_sz - len), ptr, len); + + /* dp */ + ptr = rsa_key->dp; + len = rsa_key->dp_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto free_q; + ctx->dp = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_dp, + GFP_KERNEL); + if (!ctx->dp) + goto free_q; + memcpy(ctx->dp + (half_key_sz - len), ptr, len); + + /* dq */ + ptr = rsa_key->dq; + len = rsa_key->dq_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto free_dp; + ctx->dq = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_dq, + GFP_KERNEL); + if (!ctx->dq) + goto free_dp; + memcpy(ctx->dq + (half_key_sz - len), ptr, len); + + /* qinv */ + ptr = rsa_key->qinv; + len = rsa_key->qinv_sz; + qat_rsa_drop_leading_zeros(&ptr, &len); + if (!len) + goto free_dq; + ctx->qinv = dma_zalloc_coherent(dev, half_key_sz, &ctx->dma_qinv, + GFP_KERNEL); + if (!ctx->qinv) + goto free_dq; + memcpy(ctx->qinv + (half_key_sz - len), ptr, len); + + ctx->crt_mode = true; + return; + +free_dq: + memset(ctx->dq, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->dq, ctx->dma_dq); + ctx->dq = NULL; +free_dp: + memset(ctx->dp, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->dp, ctx->dma_dp); + ctx->dp = NULL; +free_q: + memset(ctx->q, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->q, ctx->dma_q); + ctx->q = NULL; +free_p: + memset(ctx->p, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->p, ctx->dma_p); + ctx->p = NULL; +err: + ctx->crt_mode = false; +} + +static void qat_rsa_clear_ctx(struct device *dev, struct qat_rsa_ctx *ctx) +{ + unsigned int half_key_sz = ctx->key_sz / 2; /* Free the old key if any */ if (ctx->n) @@ -600,10 +758,48 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, memset(ctx->d, '\0', ctx->key_sz); dma_free_coherent(dev, ctx->key_sz, ctx->d, ctx->dma_d); } + if (ctx->p) { + memset(ctx->p, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->p, ctx->dma_p); + } + if (ctx->q) { + memset(ctx->q, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->q, ctx->dma_q); + } + if (ctx->dp) { + memset(ctx->dp, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->dp, ctx->dma_dp); + } + if (ctx->dq) { + memset(ctx->dq, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->dq, ctx->dma_dq); + } + if (ctx->qinv) { + memset(ctx->qinv, '\0', half_key_sz); + dma_free_coherent(dev, half_key_sz, ctx->qinv, ctx->dma_qinv); + } ctx->n = NULL; ctx->e = NULL; ctx->d = NULL; + ctx->p = NULL; + ctx->q = NULL; + ctx->dp = NULL; + ctx->dq = NULL; + ctx->qinv = NULL; + ctx->crt_mode = false; + ctx->key_sz = 0; +} + +static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, + unsigned int keylen, bool private) +{ + struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct device *dev = &GET_DEV(ctx->inst->accel_dev); + struct rsa_key rsa_key; + int ret; + + qat_rsa_clear_ctx(dev, ctx); if (private) ret = rsa_parse_priv_key(&rsa_key, key, keylen); @@ -622,6 +818,7 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, ret = qat_rsa_set_d(ctx, rsa_key.d, rsa_key.d_sz); if (ret < 0) goto free; + qat_rsa_setkey_crt(ctx, &rsa_key); } if (!ctx->n || !ctx->e) { @@ -637,20 +834,7 @@ static int qat_rsa_setkey(struct crypto_akcipher *tfm, const void *key, return 0; free: - if (ctx->d) { - memset(ctx->d, '\0', ctx->key_sz); - dma_free_coherent(dev, ctx->key_sz, ctx->d, ctx->dma_d); - ctx->d = NULL; - } - if (ctx->e) { - dma_free_coherent(dev, ctx->key_sz, ctx->e, ctx->dma_e); - ctx->e = NULL; - } - if (ctx->n) { - dma_free_coherent(dev, ctx->key_sz, ctx->n, ctx->dma_n); - ctx->n = NULL; - ctx->key_sz = 0; - } + qat_rsa_clear_ctx(dev, ctx); return ret; } -- cgit v1.2.3 From c9839143ebbf5e821128da44f7e271d745aab19e Mon Sep 17 00:00:00 2001 From: Salvatore Benedetto Date: Thu, 7 Jul 2016 15:27:29 +0100 Subject: crypto: qat - Add DH support Add DH support under kpp api. Drop struct qat_rsa_request and introduce a more generic struct qat_asym_request and share it between RSA and DH requests. Signed-off-by: Salvatore Benedetto Signed-off-by: Herbert Xu --- drivers/crypto/qat/Kconfig | 1 + drivers/crypto/qat/qat_common/qat_asym_algs.c | 593 ++++++++++++++++++++++---- 2 files changed, 522 insertions(+), 72 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/qat/Kconfig b/drivers/crypto/qat/Kconfig index 571d04dda415..ce3cae40f949 100644 --- a/drivers/crypto/qat/Kconfig +++ b/drivers/crypto/qat/Kconfig @@ -4,6 +4,7 @@ config CRYPTO_DEV_QAT select CRYPTO_AUTHENC select CRYPTO_BLKCIPHER select CRYPTO_AKCIPHER + select CRYPTO_DH select CRYPTO_HMAC select CRYPTO_RSA select CRYPTO_SHA1 diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index eaff02a3b1ac..3d56fb82f48a 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -49,6 +49,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -119,36 +122,454 @@ struct qat_rsa_ctx { struct qat_crypto_instance *inst; } __packed __aligned(64); -struct qat_rsa_request { - struct qat_rsa_input_params in; - struct qat_rsa_output_params out; +struct qat_dh_input_params { + union { + struct { + dma_addr_t b; + dma_addr_t xa; + dma_addr_t p; + } in; + struct { + dma_addr_t xa; + dma_addr_t p; + } in_g2; + u64 in_tab[8]; + }; +} __packed __aligned(64); + +struct qat_dh_output_params { + union { + dma_addr_t r; + u64 out_tab[8]; + }; +} __packed __aligned(64); + +struct qat_dh_ctx { + char *g; + char *xa; + char *p; + dma_addr_t dma_g; + dma_addr_t dma_xa; + dma_addr_t dma_p; + unsigned int p_size; + bool g2; + struct qat_crypto_instance *inst; +} __packed __aligned(64); + +struct qat_asym_request { + union { + struct qat_rsa_input_params rsa; + struct qat_dh_input_params dh; + } in; + union { + struct qat_rsa_output_params rsa; + struct qat_dh_output_params dh; + } out; dma_addr_t phy_in; dma_addr_t phy_out; char *src_align; char *dst_align; struct icp_qat_fw_pke_request req; - struct qat_rsa_ctx *ctx; + union { + struct qat_rsa_ctx *rsa; + struct qat_dh_ctx *dh; + } ctx; + union { + struct akcipher_request *rsa; + struct kpp_request *dh; + } areq; int err; + void (*cb)(struct icp_qat_fw_pke_resp *resp); } __aligned(64); +static void qat_dh_cb(struct icp_qat_fw_pke_resp *resp) +{ + struct qat_asym_request *req = (void *)(__force long)resp->opaque; + struct kpp_request *areq = req->areq.dh; + struct device *dev = &GET_DEV(req->ctx.dh->inst->accel_dev); + int err = ICP_QAT_FW_PKE_RESP_PKE_STAT_GET( + resp->pke_resp_hdr.comn_resp_flags); + + err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL; + + if (areq->src) { + if (req->src_align) + dma_free_coherent(dev, req->ctx.dh->p_size, + req->src_align, req->in.dh.in.b); + else + dma_unmap_single(dev, req->in.dh.in.b, + req->ctx.dh->p_size, DMA_TO_DEVICE); + } + + areq->dst_len = req->ctx.dh->p_size; + if (req->dst_align) { + scatterwalk_map_and_copy(req->dst_align, areq->dst, 0, + areq->dst_len, 1); + + dma_free_coherent(dev, req->ctx.dh->p_size, req->dst_align, + req->out.dh.r); + } else { + dma_unmap_single(dev, req->out.dh.r, req->ctx.dh->p_size, + DMA_FROM_DEVICE); + } + + dma_unmap_single(dev, req->phy_in, sizeof(struct qat_dh_input_params), + DMA_TO_DEVICE); + dma_unmap_single(dev, req->phy_out, + sizeof(struct qat_dh_output_params), + DMA_TO_DEVICE); + + kpp_request_complete(areq, err); +} + +#define PKE_DH_1536 0x390c1a49 +#define PKE_DH_G2_1536 0x2e0b1a3e +#define PKE_DH_2048 0x4d0c1a60 +#define PKE_DH_G2_2048 0x3e0b1a55 +#define PKE_DH_3072 0x510c1a77 +#define PKE_DH_G2_3072 0x3a0b1a6c +#define PKE_DH_4096 0x690c1a8e +#define PKE_DH_G2_4096 0x4a0b1a83 + +static unsigned long qat_dh_fn_id(unsigned int len, bool g2) +{ + unsigned int bitslen = len << 3; + + switch (bitslen) { + case 1536: + return g2 ? PKE_DH_G2_1536 : PKE_DH_1536; + case 2048: + return g2 ? PKE_DH_G2_2048 : PKE_DH_2048; + case 3072: + return g2 ? PKE_DH_G2_3072 : PKE_DH_3072; + case 4096: + return g2 ? PKE_DH_G2_4096 : PKE_DH_4096; + default: + return 0; + }; +} + +static inline struct qat_dh_ctx *qat_dh_get_params(struct crypto_kpp *tfm) +{ + return kpp_tfm_ctx(tfm); +} + +static int qat_dh_compute_value(struct kpp_request *req) +{ + struct crypto_kpp *tfm = crypto_kpp_reqtfm(req); + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + struct qat_crypto_instance *inst = ctx->inst; + struct device *dev = &GET_DEV(inst->accel_dev); + struct qat_asym_request *qat_req = + PTR_ALIGN(kpp_request_ctx(req), 64); + struct icp_qat_fw_pke_request *msg = &qat_req->req; + int ret, ctr = 0; + int n_input_params = 0; + + if (unlikely(!ctx->xa)) + return -EINVAL; + + if (req->dst_len < ctx->p_size) { + req->dst_len = ctx->p_size; + return -EOVERFLOW; + } + memset(msg, '\0', sizeof(*msg)); + ICP_QAT_FW_PKE_HDR_VALID_FLAG_SET(msg->pke_hdr, + ICP_QAT_FW_COMN_REQ_FLAG_SET); + + msg->pke_hdr.cd_pars.func_id = qat_dh_fn_id(ctx->p_size, + !req->src && ctx->g2); + if (unlikely(!msg->pke_hdr.cd_pars.func_id)) + return -EINVAL; + + qat_req->cb = qat_dh_cb; + qat_req->ctx.dh = ctx; + qat_req->areq.dh = req; + msg->pke_hdr.service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_PKE; + msg->pke_hdr.comn_req_flags = + ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT, + QAT_COMN_CD_FLD_TYPE_64BIT_ADR); + + /* + * If no source is provided use g as base + */ + if (req->src) { + qat_req->in.dh.in.xa = ctx->dma_xa; + qat_req->in.dh.in.p = ctx->dma_p; + n_input_params = 3; + } else { + if (ctx->g2) { + qat_req->in.dh.in_g2.xa = ctx->dma_xa; + qat_req->in.dh.in_g2.p = ctx->dma_p; + n_input_params = 2; + } else { + qat_req->in.dh.in.b = ctx->dma_g; + qat_req->in.dh.in.xa = ctx->dma_xa; + qat_req->in.dh.in.p = ctx->dma_p; + n_input_params = 3; + } + } + + ret = -ENOMEM; + if (req->src) { + /* + * src can be of any size in valid range, but HW expects it to + * be the same as modulo p so in case it is different we need + * to allocate a new buf and copy src data. + * In other case we just need to map the user provided buffer. + * Also need to make sure that it is in contiguous buffer. + */ + if (sg_is_last(req->src) && req->src_len == ctx->p_size) { + qat_req->src_align = NULL; + qat_req->in.dh.in.b = dma_map_single(dev, + sg_virt(req->src), + req->src_len, + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, + qat_req->in.dh.in.b))) + return ret; + + } else { + int shift = ctx->p_size - req->src_len; + + qat_req->src_align = dma_zalloc_coherent(dev, + ctx->p_size, + &qat_req->in.dh.in.b, + GFP_KERNEL); + if (unlikely(!qat_req->src_align)) + return ret; + + scatterwalk_map_and_copy(qat_req->src_align + shift, + req->src, 0, req->src_len, 0); + } + } + /* + * dst can be of any size in valid range, but HW expects it to be the + * same as modulo m so in case it is different we need to allocate a + * new buf and copy src data. + * In other case we just need to map the user provided buffer. + * Also need to make sure that it is in contiguous buffer. + */ + if (sg_is_last(req->dst) && req->dst_len == ctx->p_size) { + qat_req->dst_align = NULL; + qat_req->out.dh.r = dma_map_single(dev, sg_virt(req->dst), + req->dst_len, + DMA_FROM_DEVICE); + + if (unlikely(dma_mapping_error(dev, qat_req->out.dh.r))) + goto unmap_src; + + } else { + qat_req->dst_align = dma_zalloc_coherent(dev, ctx->p_size, + &qat_req->out.dh.r, + GFP_KERNEL); + if (unlikely(!qat_req->dst_align)) + goto unmap_src; + } + + qat_req->in.dh.in_tab[n_input_params] = 0; + qat_req->out.dh.out_tab[1] = 0; + /* Mapping in.in.b or in.in_g2.xa is the same */ + qat_req->phy_in = dma_map_single(dev, &qat_req->in.dh.in.b, + sizeof(struct qat_dh_input_params), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) + goto unmap_dst; + + qat_req->phy_out = dma_map_single(dev, &qat_req->out.dh.r, + sizeof(struct qat_dh_output_params), + DMA_TO_DEVICE); + if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) + goto unmap_in_params; + + msg->pke_mid.src_data_addr = qat_req->phy_in; + msg->pke_mid.dest_data_addr = qat_req->phy_out; + msg->pke_mid.opaque = (uint64_t)(__force long)qat_req; + msg->input_param_count = n_input_params; + msg->output_param_count = 1; + + do { + ret = adf_send_message(ctx->inst->pke_tx, (uint32_t *)msg); + } while (ret == -EBUSY && ctr++ < 100); + + if (!ret) + return -EINPROGRESS; + + if (!dma_mapping_error(dev, qat_req->phy_out)) + dma_unmap_single(dev, qat_req->phy_out, + sizeof(struct qat_dh_output_params), + DMA_TO_DEVICE); +unmap_in_params: + if (!dma_mapping_error(dev, qat_req->phy_in)) + dma_unmap_single(dev, qat_req->phy_in, + sizeof(struct qat_dh_input_params), + DMA_TO_DEVICE); +unmap_dst: + if (qat_req->dst_align) + dma_free_coherent(dev, ctx->p_size, qat_req->dst_align, + qat_req->out.dh.r); + else + if (!dma_mapping_error(dev, qat_req->out.dh.r)) + dma_unmap_single(dev, qat_req->out.dh.r, ctx->p_size, + DMA_FROM_DEVICE); +unmap_src: + if (req->src) { + if (qat_req->src_align) + dma_free_coherent(dev, ctx->p_size, qat_req->src_align, + qat_req->in.dh.in.b); + else + if (!dma_mapping_error(dev, qat_req->in.dh.in.b)) + dma_unmap_single(dev, qat_req->in.dh.in.b, + ctx->p_size, + DMA_TO_DEVICE); + } + return ret; +} + +static int qat_dh_check_params_length(unsigned int p_len) +{ + switch (p_len) { + case 1536: + case 2048: + case 3072: + case 4096: + return 0; + } + return -EINVAL; +} + +static int qat_dh_set_params(struct qat_dh_ctx *ctx, struct dh *params) +{ + struct qat_crypto_instance *inst = ctx->inst; + struct device *dev = &GET_DEV(inst->accel_dev); + + if (unlikely(!params->p || !params->g)) + return -EINVAL; + + if (qat_dh_check_params_length(params->p_size << 3)) + return -EINVAL; + + ctx->p_size = params->p_size; + ctx->p = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_p, GFP_KERNEL); + if (!ctx->p) + return -ENOMEM; + memcpy(ctx->p, params->p, ctx->p_size); + + /* If g equals 2 don't copy it */ + if (params->g_size == 1 && *(char *)params->g == 0x02) { + ctx->g2 = true; + return 0; + } + + ctx->g = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_g, GFP_KERNEL); + if (!ctx->g) { + dma_free_coherent(dev, ctx->p_size, ctx->p, ctx->dma_p); + ctx->p = NULL; + return -ENOMEM; + } + memcpy(ctx->g + (ctx->p_size - params->g_size), params->g, + params->g_size); + + return 0; +} + +static void qat_dh_clear_ctx(struct device *dev, struct qat_dh_ctx *ctx) +{ + if (ctx->g) { + dma_free_coherent(dev, ctx->p_size, ctx->g, ctx->dma_g); + ctx->g = NULL; + } + if (ctx->xa) { + dma_free_coherent(dev, ctx->p_size, ctx->xa, ctx->dma_xa); + ctx->xa = NULL; + } + if (ctx->p) { + dma_free_coherent(dev, ctx->p_size, ctx->p, ctx->dma_p); + ctx->p = NULL; + } + ctx->p_size = 0; + ctx->g2 = false; +} + +static int qat_dh_set_secret(struct crypto_kpp *tfm, void *buf, + unsigned int len) +{ + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + struct device *dev = &GET_DEV(ctx->inst->accel_dev); + struct dh params; + int ret; + + if (crypto_dh_decode_key(buf, len, ¶ms) < 0) + return -EINVAL; + + /* Free old secret if any */ + qat_dh_clear_ctx(dev, ctx); + + ret = qat_dh_set_params(ctx, ¶ms); + if (ret < 0) + return ret; + + ctx->xa = dma_zalloc_coherent(dev, ctx->p_size, &ctx->dma_xa, + GFP_KERNEL); + if (!ctx->xa) { + qat_dh_clear_ctx(dev, ctx); + return -ENOMEM; + } + memcpy(ctx->xa + (ctx->p_size - params.key_size), params.key, + params.key_size); + + return 0; +} + +static int qat_dh_max_size(struct crypto_kpp *tfm) +{ + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + + return ctx->p ? ctx->p_size : -EINVAL; +} + +static int qat_dh_init_tfm(struct crypto_kpp *tfm) +{ + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + struct qat_crypto_instance *inst = + qat_crypto_get_instance_node(get_current_node()); + + if (!inst) + return -EINVAL; + + ctx->p_size = 0; + ctx->g2 = false; + ctx->inst = inst; + return 0; +} + +static void qat_dh_exit_tfm(struct crypto_kpp *tfm) +{ + struct qat_dh_ctx *ctx = kpp_tfm_ctx(tfm); + struct device *dev = &GET_DEV(ctx->inst->accel_dev); + + qat_dh_clear_ctx(dev, ctx); + qat_crypto_put_instance(ctx->inst); +} + static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) { - struct akcipher_request *areq = (void *)(__force long)resp->opaque; - struct qat_rsa_request *req = PTR_ALIGN(akcipher_request_ctx(areq), 64); - struct device *dev = &GET_DEV(req->ctx->inst->accel_dev); + struct qat_asym_request *req = (void *)(__force long)resp->opaque; + struct akcipher_request *areq = req->areq.rsa; + struct device *dev = &GET_DEV(req->ctx.rsa->inst->accel_dev); int err = ICP_QAT_FW_PKE_RESP_PKE_STAT_GET( resp->pke_resp_hdr.comn_resp_flags); err = (err == ICP_QAT_FW_COMN_STATUS_FLAG_OK) ? 0 : -EINVAL; if (req->src_align) - dma_free_coherent(dev, req->ctx->key_sz, req->src_align, - req->in.enc.m); + dma_free_coherent(dev, req->ctx.rsa->key_sz, req->src_align, + req->in.rsa.enc.m); else - dma_unmap_single(dev, req->in.enc.m, req->ctx->key_sz, + dma_unmap_single(dev, req->in.rsa.enc.m, req->ctx.rsa->key_sz, DMA_TO_DEVICE); - areq->dst_len = req->ctx->key_sz; + areq->dst_len = req->ctx.rsa->key_sz; if (req->dst_align) { char *ptr = req->dst_align; @@ -157,14 +578,14 @@ static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) ptr++; } - if (areq->dst_len != req->ctx->key_sz) + if (areq->dst_len != req->ctx.rsa->key_sz) memmove(req->dst_align, ptr, areq->dst_len); scatterwalk_map_and_copy(req->dst_align, areq->dst, 0, areq->dst_len, 1); - dma_free_coherent(dev, req->ctx->key_sz, req->dst_align, - req->out.enc.c); + dma_free_coherent(dev, req->ctx.rsa->key_sz, req->dst_align, + req->out.rsa.enc.c); } else { char *ptr = sg_virt(areq->dst); @@ -176,7 +597,7 @@ static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) if (sg_virt(areq->dst) != ptr && areq->dst_len) memmove(sg_virt(areq->dst), ptr, areq->dst_len); - dma_unmap_single(dev, req->out.enc.c, req->ctx->key_sz, + dma_unmap_single(dev, req->out.rsa.enc.c, req->ctx.rsa->key_sz, DMA_FROM_DEVICE); } @@ -192,8 +613,9 @@ static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) void qat_alg_asym_callback(void *_resp) { struct icp_qat_fw_pke_resp *resp = _resp; + struct qat_asym_request *areq = (void *)(__force long)resp->opaque; - qat_rsa_cb(resp); + areq->cb(resp); } #define PKE_RSA_EP_512 0x1c161b21 @@ -289,7 +711,7 @@ static int qat_rsa_enc(struct akcipher_request *req) struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); - struct qat_rsa_request *qat_req = + struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; int ret, ctr = 0; @@ -308,14 +730,16 @@ static int qat_rsa_enc(struct akcipher_request *req) if (unlikely(!msg->pke_hdr.cd_pars.func_id)) return -EINVAL; - qat_req->ctx = ctx; + qat_req->cb = qat_rsa_cb; + qat_req->ctx.rsa = ctx; + qat_req->areq.rsa = req; msg->pke_hdr.service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_PKE; msg->pke_hdr.comn_req_flags = ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT, QAT_COMN_CD_FLD_TYPE_64BIT_ADR); - qat_req->in.enc.e = ctx->dma_e; - qat_req->in.enc.n = ctx->dma_n; + qat_req->in.rsa.enc.e = ctx->dma_e; + qat_req->in.rsa.enc.n = ctx->dma_n; ret = -ENOMEM; /* @@ -327,16 +751,16 @@ static int qat_rsa_enc(struct akcipher_request *req) */ if (sg_is_last(req->src) && req->src_len == ctx->key_sz) { qat_req->src_align = NULL; - qat_req->in.enc.m = dma_map_single(dev, sg_virt(req->src), + qat_req->in.rsa.enc.m = dma_map_single(dev, sg_virt(req->src), req->src_len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->in.enc.m))) + if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.enc.m))) return ret; } else { int shift = ctx->key_sz - req->src_len; qat_req->src_align = dma_zalloc_coherent(dev, ctx->key_sz, - &qat_req->in.enc.m, + &qat_req->in.rsa.enc.m, GFP_KERNEL); if (unlikely(!qat_req->src_align)) return ret; @@ -346,30 +770,30 @@ static int qat_rsa_enc(struct akcipher_request *req) } if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) { qat_req->dst_align = NULL; - qat_req->out.enc.c = dma_map_single(dev, sg_virt(req->dst), - req->dst_len, - DMA_FROM_DEVICE); + qat_req->out.rsa.enc.c = dma_map_single(dev, sg_virt(req->dst), + req->dst_len, + DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->out.enc.c))) + if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.enc.c))) goto unmap_src; } else { qat_req->dst_align = dma_zalloc_coherent(dev, ctx->key_sz, - &qat_req->out.enc.c, + &qat_req->out.rsa.enc.c, GFP_KERNEL); if (unlikely(!qat_req->dst_align)) goto unmap_src; } - qat_req->in.in_tab[3] = 0; - qat_req->out.out_tab[1] = 0; - qat_req->phy_in = dma_map_single(dev, &qat_req->in.enc.m, + qat_req->in.rsa.in_tab[3] = 0; + qat_req->out.rsa.out_tab[1] = 0; + qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.enc.m, sizeof(struct qat_rsa_input_params), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) goto unmap_dst; - qat_req->phy_out = dma_map_single(dev, &qat_req->out.enc.c, + qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.enc.c, sizeof(struct qat_rsa_output_params), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) @@ -377,7 +801,7 @@ static int qat_rsa_enc(struct akcipher_request *req) msg->pke_mid.src_data_addr = qat_req->phy_in; msg->pke_mid.dest_data_addr = qat_req->phy_out; - msg->pke_mid.opaque = (uint64_t)(__force long)req; + msg->pke_mid.opaque = (uint64_t)(__force long)qat_req; msg->input_param_count = 3; msg->output_param_count = 1; do { @@ -399,19 +823,19 @@ unmap_in_params: unmap_dst: if (qat_req->dst_align) dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align, - qat_req->out.enc.c); + qat_req->out.rsa.enc.c); else - if (!dma_mapping_error(dev, qat_req->out.enc.c)) - dma_unmap_single(dev, qat_req->out.enc.c, ctx->key_sz, - DMA_FROM_DEVICE); + if (!dma_mapping_error(dev, qat_req->out.rsa.enc.c)) + dma_unmap_single(dev, qat_req->out.rsa.enc.c, + ctx->key_sz, DMA_FROM_DEVICE); unmap_src: if (qat_req->src_align) dma_free_coherent(dev, ctx->key_sz, qat_req->src_align, - qat_req->in.enc.m); + qat_req->in.rsa.enc.m); else - if (!dma_mapping_error(dev, qat_req->in.enc.m)) - dma_unmap_single(dev, qat_req->in.enc.m, ctx->key_sz, - DMA_TO_DEVICE); + if (!dma_mapping_error(dev, qat_req->in.rsa.enc.m)) + dma_unmap_single(dev, qat_req->in.rsa.enc.m, + ctx->key_sz, DMA_TO_DEVICE); return ret; } @@ -421,7 +845,7 @@ static int qat_rsa_dec(struct akcipher_request *req) struct qat_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); struct qat_crypto_instance *inst = ctx->inst; struct device *dev = &GET_DEV(inst->accel_dev); - struct qat_rsa_request *qat_req = + struct qat_asym_request *qat_req = PTR_ALIGN(akcipher_request_ctx(req), 64); struct icp_qat_fw_pke_request *msg = &qat_req->req; int ret, ctr = 0; @@ -442,21 +866,23 @@ static int qat_rsa_dec(struct akcipher_request *req) if (unlikely(!msg->pke_hdr.cd_pars.func_id)) return -EINVAL; - qat_req->ctx = ctx; + qat_req->cb = qat_rsa_cb; + qat_req->ctx.rsa = ctx; + qat_req->areq.rsa = req; msg->pke_hdr.service_type = ICP_QAT_FW_COMN_REQ_CPM_FW_PKE; msg->pke_hdr.comn_req_flags = ICP_QAT_FW_COMN_FLAGS_BUILD(QAT_COMN_PTR_TYPE_FLAT, QAT_COMN_CD_FLD_TYPE_64BIT_ADR); if (ctx->crt_mode) { - qat_req->in.dec_crt.p = ctx->dma_p; - qat_req->in.dec_crt.q = ctx->dma_q; - qat_req->in.dec_crt.dp = ctx->dma_dp; - qat_req->in.dec_crt.dq = ctx->dma_dq; - qat_req->in.dec_crt.qinv = ctx->dma_qinv; + qat_req->in.rsa.dec_crt.p = ctx->dma_p; + qat_req->in.rsa.dec_crt.q = ctx->dma_q; + qat_req->in.rsa.dec_crt.dp = ctx->dma_dp; + qat_req->in.rsa.dec_crt.dq = ctx->dma_dq; + qat_req->in.rsa.dec_crt.qinv = ctx->dma_qinv; } else { - qat_req->in.dec.d = ctx->dma_d; - qat_req->in.dec.n = ctx->dma_n; + qat_req->in.rsa.dec.d = ctx->dma_d; + qat_req->in.rsa.dec.n = ctx->dma_n; } ret = -ENOMEM; @@ -469,16 +895,16 @@ static int qat_rsa_dec(struct akcipher_request *req) */ if (sg_is_last(req->src) && req->src_len == ctx->key_sz) { qat_req->src_align = NULL; - qat_req->in.dec.c = dma_map_single(dev, sg_virt(req->src), + qat_req->in.rsa.dec.c = dma_map_single(dev, sg_virt(req->src), req->dst_len, DMA_TO_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->in.dec.c))) + if (unlikely(dma_mapping_error(dev, qat_req->in.rsa.dec.c))) return ret; } else { int shift = ctx->key_sz - req->src_len; qat_req->src_align = dma_zalloc_coherent(dev, ctx->key_sz, - &qat_req->in.dec.c, + &qat_req->in.rsa.dec.c, GFP_KERNEL); if (unlikely(!qat_req->src_align)) return ret; @@ -488,16 +914,16 @@ static int qat_rsa_dec(struct akcipher_request *req) } if (sg_is_last(req->dst) && req->dst_len == ctx->key_sz) { qat_req->dst_align = NULL; - qat_req->out.dec.m = dma_map_single(dev, sg_virt(req->dst), + qat_req->out.rsa.dec.m = dma_map_single(dev, sg_virt(req->dst), req->dst_len, DMA_FROM_DEVICE); - if (unlikely(dma_mapping_error(dev, qat_req->out.dec.m))) + if (unlikely(dma_mapping_error(dev, qat_req->out.rsa.dec.m))) goto unmap_src; } else { qat_req->dst_align = dma_zalloc_coherent(dev, ctx->key_sz, - &qat_req->out.dec.m, + &qat_req->out.rsa.dec.m, GFP_KERNEL); if (unlikely(!qat_req->dst_align)) goto unmap_src; @@ -505,17 +931,17 @@ static int qat_rsa_dec(struct akcipher_request *req) } if (ctx->crt_mode) - qat_req->in.in_tab[6] = 0; + qat_req->in.rsa.in_tab[6] = 0; else - qat_req->in.in_tab[3] = 0; - qat_req->out.out_tab[1] = 0; - qat_req->phy_in = dma_map_single(dev, &qat_req->in.dec.c, + qat_req->in.rsa.in_tab[3] = 0; + qat_req->out.rsa.out_tab[1] = 0; + qat_req->phy_in = dma_map_single(dev, &qat_req->in.rsa.dec.c, sizeof(struct qat_rsa_input_params), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_in))) goto unmap_dst; - qat_req->phy_out = dma_map_single(dev, &qat_req->out.dec.m, + qat_req->phy_out = dma_map_single(dev, &qat_req->out.rsa.dec.m, sizeof(struct qat_rsa_output_params), DMA_TO_DEVICE); if (unlikely(dma_mapping_error(dev, qat_req->phy_out))) @@ -523,7 +949,7 @@ static int qat_rsa_dec(struct akcipher_request *req) msg->pke_mid.src_data_addr = qat_req->phy_in; msg->pke_mid.dest_data_addr = qat_req->phy_out; - msg->pke_mid.opaque = (uint64_t)(__force long)req; + msg->pke_mid.opaque = (uint64_t)(__force long)qat_req; if (ctx->crt_mode) msg->input_param_count = 6; else @@ -549,19 +975,19 @@ unmap_in_params: unmap_dst: if (qat_req->dst_align) dma_free_coherent(dev, ctx->key_sz, qat_req->dst_align, - qat_req->out.dec.m); + qat_req->out.rsa.dec.m); else - if (!dma_mapping_error(dev, qat_req->out.dec.m)) - dma_unmap_single(dev, qat_req->out.dec.m, ctx->key_sz, - DMA_FROM_DEVICE); + if (!dma_mapping_error(dev, qat_req->out.rsa.dec.m)) + dma_unmap_single(dev, qat_req->out.rsa.dec.m, + ctx->key_sz, DMA_FROM_DEVICE); unmap_src: if (qat_req->src_align) dma_free_coherent(dev, ctx->key_sz, qat_req->src_align, - qat_req->in.dec.c); + qat_req->in.rsa.dec.c); else - if (!dma_mapping_error(dev, qat_req->in.dec.c)) - dma_unmap_single(dev, qat_req->in.dec.c, ctx->key_sz, - DMA_TO_DEVICE); + if (!dma_mapping_error(dev, qat_req->in.rsa.dec.c)) + dma_unmap_single(dev, qat_req->in.rsa.dec.c, + ctx->key_sz, DMA_TO_DEVICE); return ret; } @@ -900,7 +1326,7 @@ static struct akcipher_alg rsa = { .max_size = qat_rsa_max_size, .init = qat_rsa_init_tfm, .exit = qat_rsa_exit_tfm, - .reqsize = sizeof(struct qat_rsa_request) + 64, + .reqsize = sizeof(struct qat_asym_request) + 64, .base = { .cra_name = "rsa", .cra_driver_name = "qat-rsa", @@ -910,6 +1336,23 @@ static struct akcipher_alg rsa = { }, }; +static struct kpp_alg dh = { + .set_secret = qat_dh_set_secret, + .generate_public_key = qat_dh_compute_value, + .compute_shared_secret = qat_dh_compute_value, + .max_size = qat_dh_max_size, + .init = qat_dh_init_tfm, + .exit = qat_dh_exit_tfm, + .reqsize = sizeof(struct qat_asym_request) + 64, + .base = { + .cra_name = "dh", + .cra_driver_name = "qat-dh", + .cra_priority = 1000, + .cra_module = THIS_MODULE, + .cra_ctxsize = sizeof(struct qat_dh_ctx), + }, +}; + int qat_asym_algs_register(void) { int ret = 0; @@ -918,7 +1361,11 @@ int qat_asym_algs_register(void) if (++active_devs == 1) { rsa.base.cra_flags = 0; ret = crypto_register_akcipher(&rsa); + if (ret) + goto unlock; + ret = crypto_register_kpp(&dh); } +unlock: mutex_unlock(&algs_lock); return ret; } @@ -926,7 +1373,9 @@ int qat_asym_algs_register(void) void qat_asym_algs_unregister(void) { mutex_lock(&algs_lock); - if (--active_devs == 0) + if (--active_devs == 0) { crypto_unregister_akcipher(&rsa); + crypto_unregister_kpp(&dh); + } mutex_unlock(&algs_lock); } -- cgit v1.2.3 From bd76ad4abfdccd26a9ac11214aa715e83bc8e808 Mon Sep 17 00:00:00 2001 From: Salvatore Benedetto Date: Thu, 7 Jul 2016 15:52:17 +0100 Subject: crypto: qat - Stop dropping leading zeros from RSA output There is not need to drop leading zeros from the RSA output operations results. Signed-off-by: Salvatore Benedetto Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_asym_algs.c | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/qat/qat_common/qat_asym_algs.c b/drivers/crypto/qat/qat_common/qat_asym_algs.c index 3d56fb82f48a..0d35dca2e925 100644 --- a/drivers/crypto/qat/qat_common/qat_asym_algs.c +++ b/drivers/crypto/qat/qat_common/qat_asym_algs.c @@ -571,32 +571,12 @@ static void qat_rsa_cb(struct icp_qat_fw_pke_resp *resp) areq->dst_len = req->ctx.rsa->key_sz; if (req->dst_align) { - char *ptr = req->dst_align; - - while (!(*ptr) && areq->dst_len) { - areq->dst_len--; - ptr++; - } - - if (areq->dst_len != req->ctx.rsa->key_sz) - memmove(req->dst_align, ptr, areq->dst_len); - scatterwalk_map_and_copy(req->dst_align, areq->dst, 0, areq->dst_len, 1); dma_free_coherent(dev, req->ctx.rsa->key_sz, req->dst_align, req->out.rsa.enc.c); } else { - char *ptr = sg_virt(areq->dst); - - while (!(*ptr) && areq->dst_len) { - areq->dst_len--; - ptr++; - } - - if (sg_virt(areq->dst) != ptr && areq->dst_len) - memmove(sg_virt(areq->dst), ptr, areq->dst_len); - dma_unmap_single(dev, req->out.rsa.enc.c, req->ctx.rsa->key_sz, DMA_FROM_DEVICE); } -- cgit v1.2.3 From 7c001a8650e67cb293120044c3f62756258ed27c Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 12 Jul 2016 13:17:52 +0800 Subject: crypto: omap - Stop using crypto scatterwalk_bytes_sglen We already have a generic function sg_nents_for_len which does the same thing. This patch switches omap over to it and also adds error handling in case the SG list is short. Signed-off-by: Herbert Xu --- drivers/crypto/omap-aes.c | 16 ++++++++++------ drivers/crypto/omap-des.c | 14 ++++++++++---- 2 files changed, 20 insertions(+), 10 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index 8178632de788..4ab53a604312 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -578,10 +578,12 @@ static int omap_aes_copy_sgs(struct omap_aes_dev *dd) sg_init_table(&dd->in_sgl, 1); sg_set_buf(&dd->in_sgl, buf_in, total); dd->in_sg = &dd->in_sgl; + dd->in_sg_len = 1; sg_init_table(&dd->out_sgl, 1); sg_set_buf(&dd->out_sgl, buf_out, total); dd->out_sg = &dd->out_sgl; + dd->out_sg_len = 1; return 0; } @@ -602,7 +604,6 @@ static int omap_aes_prepare_req(struct crypto_engine *engine, crypto_ablkcipher_reqtfm(req)); struct omap_aes_dev *dd = omap_aes_find_dev(ctx); struct omap_aes_reqctx *rctx; - int len; if (!dd) return -ENODEV; @@ -614,6 +615,14 @@ static int omap_aes_prepare_req(struct crypto_engine *engine, dd->in_sg = req->src; dd->out_sg = req->dst; + dd->in_sg_len = sg_nents_for_len(dd->in_sg, dd->total); + if (dd->in_sg_len < 0) + return dd->in_sg_len; + + dd->out_sg_len = sg_nents_for_len(dd->out_sg, dd->total); + if (dd->out_sg_len < 0) + return dd->out_sg_len; + if (omap_aes_check_aligned(dd->in_sg, dd->total) || omap_aes_check_aligned(dd->out_sg, dd->total)) { if (omap_aes_copy_sgs(dd)) @@ -623,11 +632,6 @@ static int omap_aes_prepare_req(struct crypto_engine *engine, dd->sgs_copied = 0; } - len = ALIGN(dd->total, AES_BLOCK_SIZE); - dd->in_sg_len = scatterwalk_bytes_sglen(dd->in_sg, len); - dd->out_sg_len = scatterwalk_bytes_sglen(dd->out_sg, len); - BUG_ON(dd->in_sg_len < 0 || dd->out_sg_len < 0); - rctx = ablkcipher_request_ctx(req); ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); rctx->mode &= FLAGS_MODE_MASK; diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index 3eedb03111ba..5691434ffb2d 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -560,10 +560,12 @@ static int omap_des_copy_sgs(struct omap_des_dev *dd) sg_init_table(&dd->in_sgl, 1); sg_set_buf(&dd->in_sgl, buf_in, dd->total); dd->in_sg = &dd->in_sgl; + dd->in_sg_len = 1; sg_init_table(&dd->out_sgl, 1); sg_set_buf(&dd->out_sgl, buf_out, dd->total); dd->out_sg = &dd->out_sgl; + dd->out_sg_len = 1; return 0; } @@ -595,6 +597,14 @@ static int omap_des_prepare_req(struct crypto_engine *engine, dd->in_sg = req->src; dd->out_sg = req->dst; + dd->in_sg_len = sg_nents_for_len(dd->in_sg, dd->total); + if (dd->in_sg_len < 0) + return dd->in_sg_len; + + dd->out_sg_len = sg_nents_for_len(dd->out_sg, dd->total); + if (dd->out_sg_len < 0) + return dd->out_sg_len; + if (omap_des_copy_needed(dd->in_sg) || omap_des_copy_needed(dd->out_sg)) { if (omap_des_copy_sgs(dd)) @@ -604,10 +614,6 @@ static int omap_des_prepare_req(struct crypto_engine *engine, dd->sgs_copied = 0; } - dd->in_sg_len = scatterwalk_bytes_sglen(dd->in_sg, dd->total); - dd->out_sg_len = scatterwalk_bytes_sglen(dd->out_sg, dd->total); - BUG_ON(dd->in_sg_len < 0 || dd->out_sg_len < 0); - rctx = ablkcipher_request_ctx(req); ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); rctx->mode &= FLAGS_MODE_MASK; -- cgit v1.2.3 From e514cc0a492a3f39ef71b31590a7ef67537ee04b Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 15 Jul 2016 14:09:13 +0300 Subject: crypto: nx - off by one bug in nx_of_update_msc() The props->ap[] array is defined like this: struct alg_props ap[NX_MAX_FC][NX_MAX_MODE][3]; So we can see that if msc->fc and msc->mode are == to NX_MAX_FC or NX_MAX_MODE then we're off by one. Fixes: ae0222b7289d ('powerpc/crypto: nx driver code supporting nx encryption') Signed-off-by: Dan Carpenter Signed-off-by: Herbert Xu --- drivers/crypto/nx/nx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/nx/nx.c b/drivers/crypto/nx/nx.c index 0794f1cc0018..42f0f229f7f7 100644 --- a/drivers/crypto/nx/nx.c +++ b/drivers/crypto/nx/nx.c @@ -392,7 +392,7 @@ static void nx_of_update_msc(struct device *dev, ((bytes_so_far + sizeof(struct msc_triplet)) <= lenp) && i < msc->triplets; i++) { - if (msc->fc > NX_MAX_FC || msc->mode > NX_MAX_MODE) { + if (msc->fc >= NX_MAX_FC || msc->mode >= NX_MAX_MODE) { dev_err(dev, "unknown function code/mode " "combo: %d/%d (ignored)\n", msc->fc, msc->mode); -- cgit v1.2.3 From aa6416ee4676e5c235d4c7315340c24457e39430 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Mon, 18 Jul 2016 11:32:24 +0200 Subject: crypto: marvell - Fix wrong flag used for GFP in mv_cesa_dma_add_iv_op Use the parameter 'gfp_flags' instead of 'flag' as second argument of dma_pool_alloc(). The parameter 'flag' is for the TDMA descriptor, its content has no sense for the allocator. Fixes: bac8e805a30d ("crypto: marvell - Copy IV vectors by DMA...") Signed-off-by: Romain Perier Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/tdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/marvell/tdma.c b/drivers/crypto/marvell/tdma.c index b0f9f5050dc8..86a065bcc187 100644 --- a/drivers/crypto/marvell/tdma.c +++ b/drivers/crypto/marvell/tdma.c @@ -221,7 +221,7 @@ int mv_cesa_dma_add_iv_op(struct mv_cesa_tdma_chain *chain, dma_addr_t src, if (IS_ERR(tdma)) return PTR_ERR(tdma); - iv = dma_pool_alloc(cesa_dev->dma->iv_pool, flags, &dma_handle); + iv = dma_pool_alloc(cesa_dev->dma->iv_pool, gfp_flags, &dma_handle); if (!iv) return -ENOMEM; -- cgit v1.2.3 From 11c6e16ee13ab68b8ff04c17ab41611a4fcc5c81 Mon Sep 17 00:00:00 2001 From: Paulo Flabiano Smorigo Date: Mon, 18 Jul 2016 12:26:25 -0300 Subject: crypto: vmx - Adding asm subroutines for XTS This patch add XTS subroutines using VMX-crypto driver. It gives a boost of 20 times using XTS. These code has been adopted from OpenSSL project in collaboration with the original author (Andy Polyakov ). Signed-off-by: Leonidas S. Barbosa Signed-off-by: Paulo Flabiano Smorigo Signed-off-by: Herbert Xu --- drivers/crypto/vmx/aesp8-ppc.h | 4 + drivers/crypto/vmx/aesp8-ppc.pl | 1865 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 1867 insertions(+), 2 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/vmx/aesp8-ppc.h b/drivers/crypto/vmx/aesp8-ppc.h index 4cd34ee54a94..01972e16a6c0 100644 --- a/drivers/crypto/vmx/aesp8-ppc.h +++ b/drivers/crypto/vmx/aesp8-ppc.h @@ -19,3 +19,7 @@ void aes_p8_cbc_encrypt(const u8 *in, u8 *out, size_t len, void aes_p8_ctr32_encrypt_blocks(const u8 *in, u8 *out, size_t len, const struct aes_key *key, const u8 *iv); +void aes_p8_xts_encrypt(const u8 *in, u8 *out, size_t len, + const struct aes_key *key1, const struct aes_key *key2, u8 *iv); +void aes_p8_xts_decrypt(const u8 *in, u8 *out, size_t len, + const struct aes_key *key1, const struct aes_key *key2, u8 *iv); diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl index 228053921b3f..813ffcc4d17c 100644 --- a/drivers/crypto/vmx/aesp8-ppc.pl +++ b/drivers/crypto/vmx/aesp8-ppc.pl @@ -1,4 +1,11 @@ -#!/usr/bin/env perl +#! /usr/bin/env perl +# Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the OpenSSL license (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + # # ==================================================================== # Written by Andy Polyakov for the OpenSSL @@ -20,6 +27,19 @@ # instructions are interleaved. It's reckoned that eventual # misalignment penalties at page boundaries are in average lower # than additional overhead in pure AltiVec approach. +# +# May 2016 +# +# Add XTS subroutine, 9x on little- and 12x improvement on big-endian +# systems were measured. +# +###################################################################### +# Current large-block performance in cycles per byte processed with +# 128-bit key (less is better). +# +# CBC en-/decrypt CTR XTS +# POWER8[le] 3.96/0.72 0.74 1.1 +# POWER8[be] 3.75/0.65 0.66 1.0 $flavour = shift; @@ -1875,6 +1895,1847 @@ Lctr32_enc8x_done: ___ }} }}} +######################################################################### +{{{ # XTS procedures # +# int aes_p8_xts_[en|de]crypt(const char *inp, char *out, size_t len, # +# const AES_KEY *key1, const AES_KEY *key2, # +# [const] unsigned char iv[16]); # +# If $key2 is NULL, then a "tweak chaining" mode is engaged, in which # +# input tweak value is assumed to be encrypted already, and last tweak # +# value, one suitable for consecutive call on same chunk of data, is # +# written back to original buffer. In addition, in "tweak chaining" # +# mode only complete input blocks are processed. # + +my ($inp,$out,$len,$key1,$key2,$ivp,$rounds,$idx) = map("r$_",(3..10)); +my ($rndkey0,$rndkey1,$inout) = map("v$_",(0..2)); +my ($output,$inptail,$inpperm,$leperm,$keyperm) = map("v$_",(3..7)); +my ($tweak,$seven,$eighty7,$tmp,$tweak1) = map("v$_",(8..12)); +my $taillen = $key2; + + ($inp,$idx) = ($idx,$inp); # reassign + +$code.=<<___; +.globl .${prefix}_xts_encrypt + mr $inp,r3 # reassign + li r3,-1 + ${UCMP}i $len,16 + bltlr- + + lis r0,0xfff0 + mfspr r12,256 # save vrsave + li r11,0 + mtspr 256,r0 + + vspltisb $seven,0x07 # 0x070707..07 + le?lvsl $leperm,r11,r11 + le?vspltisb $tmp,0x0f + le?vxor $leperm,$leperm,$seven + + li $idx,15 + lvx $tweak,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + le?vxor $inpperm,$inpperm,$tmp + vperm $tweak,$tweak,$inptail,$inpperm + + neg r11,$inp + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inout,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + ${UCMP}i $key2,0 # key2==NULL? + beq Lxts_enc_no_key2 + + ?lvsl $keyperm,0,$key2 # prepare for unaligned key + lwz $rounds,240($key2) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + lvx $rndkey0,0,$key2 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + mtctr $rounds + +Ltweak_xts_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + bdnz Ltweak_xts_enc + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipherlast $tweak,$tweak,$rndkey0 + + li $ivp,0 # don't chain the tweak + b Lxts_enc + +Lxts_enc_no_key2: + li $idx,-16 + and $len,$len,$idx # in "tweak chaining" + # mode only complete + # blocks are processed +Lxts_enc: + lvx $inptail,0,$inp + addi $inp,$inp,16 + + ?lvsl $keyperm,0,$key1 # prepare for unaligned key + lwz $rounds,240($key1) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + vslb $eighty7,$seven,$seven # 0x808080..80 + vor $eighty7,$eighty7,$seven # 0x878787..87 + vspltisb $tmp,1 # 0x010101..01 + vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 + + ${UCMP}i $len,96 + bge _aesp8_xts_encrypt6x + + andi. $taillen,$len,15 + subic r0,$len,32 + subi $taillen,$taillen,16 + subfe r0,r0,r0 + and r0,r0,$taillen + add $inp,$inp,r0 + + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + mtctr $rounds + b Loop_xts_enc + +.align 5 +Loop_xts_enc: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_enc + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak + vcipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + addi $out,$out,16 + + subic. $len,$len,16 + beq Lxts_enc_done + + vmr $inout,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + + subic r0,$len,32 + subfe r0,r0,r0 + and r0,r0,$taillen + add $inp,$inp,r0 + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $output,$output,$rndkey0 # just in case $len<16 + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + mtctr $rounds + ${UCMP}i $len,16 + bge Loop_xts_enc + + vxor $output,$output,$tweak + lvsr $inpperm,0,$len # $inpperm is no longer needed + vxor $inptail,$inptail,$inptail # $inptail is no longer needed + vspltisb $tmp,-1 + vperm $inptail,$inptail,$tmp,$inpperm + vsel $inout,$inout,$output,$inptail + + subi r11,$out,17 + subi $out,$out,16 + mtctr $len + li $len,16 +Loop_xts_enc_steal: + lbzu r0,1(r11) + stb r0,16(r11) + bdnz Loop_xts_enc_steal + + mtctr $rounds + b Loop_xts_enc # one more time... + +Lxts_enc_done: + ${UCMP}i $ivp,0 + beq Lxts_enc_ret + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_enc_ret: + mtspr 256,r12 # restore vrsave + li r3,0 + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt + +.globl .${prefix}_xts_decrypt +.align 5 +.${prefix}_xts_decrypt: + mr $inp,r3 # reassign + li r3,-1 + ${UCMP}i $len,16 + bltlr- + + lis r0,0xfff8 + mfspr r12,256 # save vrsave + li r11,0 + mtspr 256,r0 + + andi. r0,$len,15 + neg r0,r0 + andi. r0,r0,16 + sub $len,$len,r0 + + vspltisb $seven,0x07 # 0x070707..07 + le?lvsl $leperm,r11,r11 + le?vspltisb $tmp,0x0f + le?vxor $leperm,$leperm,$seven + + li $idx,15 + lvx $tweak,0,$ivp # load [unaligned] iv + lvsl $inpperm,0,$ivp + lvx $inptail,$idx,$ivp + le?vxor $inpperm,$inpperm,$tmp + vperm $tweak,$tweak,$inptail,$inpperm + + neg r11,$inp + lvsr $inpperm,0,r11 # prepare for unaligned load + lvx $inout,0,$inp + addi $inp,$inp,15 # 15 is not typo + le?vxor $inpperm,$inpperm,$tmp + + ${UCMP}i $key2,0 # key2==NULL? + beq Lxts_dec_no_key2 + + ?lvsl $keyperm,0,$key2 # prepare for unaligned key + lwz $rounds,240($key2) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + lvx $rndkey0,0,$key2 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + mtctr $rounds + +Ltweak_xts_dec: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipher $tweak,$tweak,$rndkey0 + lvx $rndkey0,$idx,$key2 + addi $idx,$idx,16 + bdnz Ltweak_xts_dec + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vcipher $tweak,$tweak,$rndkey1 + lvx $rndkey1,$idx,$key2 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vcipherlast $tweak,$tweak,$rndkey0 + + li $ivp,0 # don't chain the tweak + b Lxts_dec + +Lxts_dec_no_key2: + neg $idx,$len + andi. $idx,$idx,15 + add $len,$len,$idx # in "tweak chaining" + # mode only complete + # blocks are processed +Lxts_dec: + lvx $inptail,0,$inp + addi $inp,$inp,16 + + ?lvsl $keyperm,0,$key1 # prepare for unaligned key + lwz $rounds,240($key1) + srwi $rounds,$rounds,1 + subi $rounds,$rounds,1 + li $idx,16 + + vslb $eighty7,$seven,$seven # 0x808080..80 + vor $eighty7,$eighty7,$seven # 0x878787..87 + vspltisb $tmp,1 # 0x010101..01 + vsldoi $eighty7,$eighty7,$tmp,15 # 0x870101..01 + + ${UCMP}i $len,96 + bge _aesp8_xts_decrypt6x + + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + mtctr $rounds + + ${UCMP}i $len,16 + blt Ltail_xts_dec + be?b Loop_xts_dec + +.align 5 +Loop_xts_dec: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_dec + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak + vncipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + addi $out,$out,16 + + subic. $len,$len,16 + beq Lxts_dec_done + + vmr $inout,$inptail + lvx $inptail,0,$inp + addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $inout,$inout,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + mtctr $rounds + ${UCMP}i $len,16 + bge Loop_xts_dec + +Ltail_xts_dec: + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak1,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak1,$tweak1,$tmp + + subi $inp,$inp,16 + add $inp,$inp,$len + + vxor $inout,$inout,$tweak # :-( + vxor $inout,$inout,$tweak1 # :-) + +Loop_xts_dec_short: + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vncipher $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + bdnz Loop_xts_dec_short + + ?vperm $rndkey1,$rndkey1,$rndkey0,$keyperm + vncipher $inout,$inout,$rndkey1 + lvx $rndkey1,$idx,$key1 + li $idx,16 + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + vxor $rndkey0,$rndkey0,$tweak1 + vncipherlast $output,$inout,$rndkey0 + + le?vperm $tmp,$output,$output,$leperm + be?nop + le?stvx_u $tmp,0,$out + be?stvx_u $output,0,$out + + vmr $inout,$inptail + lvx $inptail,0,$inp + #addi $inp,$inp,16 + lvx $rndkey0,0,$key1 + lvx $rndkey1,$idx,$key1 + addi $idx,$idx,16 + vperm $inout,$inout,$inptail,$inpperm + ?vperm $rndkey0,$rndkey0,$rndkey1,$keyperm + + lvsr $inpperm,0,$len # $inpperm is no longer needed + vxor $inptail,$inptail,$inptail # $inptail is no longer needed + vspltisb $tmp,-1 + vperm $inptail,$inptail,$tmp,$inpperm + vsel $inout,$inout,$output,$inptail + + vxor $rndkey0,$rndkey0,$tweak + vxor $inout,$inout,$rndkey0 + lvx $rndkey0,$idx,$key1 + addi $idx,$idx,16 + + subi r11,$out,1 + mtctr $len + li $len,16 +Loop_xts_dec_steal: + lbzu r0,1(r11) + stb r0,16(r11) + bdnz Loop_xts_dec_steal + + mtctr $rounds + b Loop_xts_dec # one more time... + +Lxts_dec_done: + ${UCMP}i $ivp,0 + beq Lxts_dec_ret + + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $tweak,$tweak,$tmp + + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_dec_ret: + mtspr 256,r12 # restore vrsave + li r3,0 + blr + .long 0 + .byte 0,12,0x04,0,0x80,6,6,0 + .long 0 +.size .${prefix}_xts_decrypt,.-.${prefix}_xts_decrypt +___ +######################################################################### +{{ # Optimized XTS procedures # +my $key_=$key2; +my ($x00,$x10,$x20,$x30,$x40,$x50,$x60,$x70)=map("r$_",(0,3,26..31)); + $x00=0 if ($flavour =~ /osx/); +my ($in0, $in1, $in2, $in3, $in4, $in5 )=map("v$_",(0..5)); +my ($out0, $out1, $out2, $out3, $out4, $out5)=map("v$_",(7,12..16)); +my ($twk0, $twk1, $twk2, $twk3, $twk4, $twk5)=map("v$_",(17..22)); +my $rndkey0="v23"; # v24-v25 rotating buffer for first found keys + # v26-v31 last 6 round keys +my ($keyperm)=($out0); # aliases with "caller", redundant assignment +my $taillen=$x70; + +$code.=<<___; +.align 5 +_aesp8_xts_encrypt6x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + mflr r11 + li r7,`$FRAME+8*16+15` + li r3,`$FRAME+8*16+31` + $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) + stvx v20,r7,$sp # ABI says so + addi r7,r7,32 + stvx v21,r3,$sp + addi r3,r3,32 + stvx v22,r7,$sp + addi r7,r7,32 + stvx v23,r3,$sp + addi r3,r3,32 + stvx v24,r7,$sp + addi r7,r7,32 + stvx v25,r3,$sp + addi r3,r3,32 + stvx v26,r7,$sp + addi r7,r7,32 + stvx v27,r3,$sp + addi r3,r3,32 + stvx v28,r7,$sp + addi r7,r7,32 + stvx v29,r3,$sp + addi r3,r3,32 + stvx v30,r7,$sp + stvx v31,r3,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + + lvx $rndkey0,$x00,$key1 # load key schedule + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + lvx v31,$x00,$key1 + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_xts_enc_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key1 + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_xts_enc_key + + lvx v26,$x10,$key1 + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key1 + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key1 + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key1 + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key1 + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key1 + ?vperm v29,v29,v30,$keyperm + lvx $twk5,$x70,$key1 # borrow $twk5 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$twk5,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + vperm $in0,$inout,$inptail,$inpperm + subi $inp,$inp,31 # undo "caller" + vxor $twk0,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $out0,$in0,$twk0 + vxor $tweak,$tweak,$tmp + + lvx_u $in1,$x10,$inp + vxor $twk1,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in1,$in1,$in1,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out1,$in1,$twk1 + vxor $tweak,$tweak,$tmp + + lvx_u $in2,$x20,$inp + andi. $taillen,$len,15 + vxor $twk2,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in2,$in2,$in2,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out2,$in2,$twk2 + vxor $tweak,$tweak,$tmp + + lvx_u $in3,$x30,$inp + sub $len,$len,$taillen + vxor $twk3,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in3,$in3,$in3,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out3,$in3,$twk3 + vxor $tweak,$tweak,$tmp + + lvx_u $in4,$x40,$inp + subi $len,$len,0x60 + vxor $twk4,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in4,$in4,$in4,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out4,$in4,$twk4 + vxor $tweak,$tweak,$tmp + + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + vxor $twk5,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in5,$in5,$in5,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out5,$in5,$twk5 + vxor $tweak,$tweak,$tmp + + vxor v31,v31,$rndkey0 + mtctr $rounds + b Loop_xts_enc6x + +.align 5 +Loop_xts_enc6x: + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_enc6x + + subic $len,$len,96 # $len-=96 + vxor $in0,$twk0,v31 # xor with last round key + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk0,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out4,$out4,v24 + vcipher $out5,$out5,v24 + + subfe. r0,r0,r0 # borrow?-1:0 + vand $tmp,$tmp,$eighty7 + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vxor $tweak,$tweak,$tmp + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vxor $in1,$twk1,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk1,$tweak,$rndkey0 + vcipher $out4,$out4,v25 + vcipher $out5,$out5,v25 + + and r0,r0,$len + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out0,$out0,v26 + vcipher $out1,$out1,v26 + vand $tmp,$tmp,$eighty7 + vcipher $out2,$out2,v26 + vcipher $out3,$out3,v26 + vxor $tweak,$tweak,$tmp + vcipher $out4,$out4,v26 + vcipher $out5,$out5,v26 + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in5 are loaded + # with last "words" + vxor $in2,$twk2,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk2,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vcipher $out0,$out0,v27 + vcipher $out1,$out1,v27 + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out2,$out2,v27 + vcipher $out3,$out3,v27 + vand $tmp,$tmp,$eighty7 + vcipher $out4,$out4,v27 + vcipher $out5,$out5,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vxor $tweak,$tweak,$tmp + vcipher $out0,$out0,v28 + vcipher $out1,$out1,v28 + vxor $in3,$twk3,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk3,$tweak,$rndkey0 + vcipher $out2,$out2,v28 + vcipher $out3,$out3,v28 + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipher $out4,$out4,v28 + vcipher $out5,$out5,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vand $tmp,$tmp,$eighty7 + + vcipher $out0,$out0,v29 + vcipher $out1,$out1,v29 + vxor $tweak,$tweak,$tmp + vcipher $out2,$out2,v29 + vcipher $out3,$out3,v29 + vxor $in4,$twk4,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk4,$tweak,$rndkey0 + vcipher $out4,$out4,v29 + vcipher $out5,$out5,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + + vcipher $out0,$out0,v30 + vcipher $out1,$out1,v30 + vand $tmp,$tmp,$eighty7 + vcipher $out2,$out2,v30 + vcipher $out3,$out3,v30 + vxor $tweak,$tweak,$tmp + vcipher $out4,$out4,v30 + vcipher $out5,$out5,v30 + vxor $in5,$twk5,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk5,$tweak,$rndkey0 + + vcipherlast $out0,$out0,$in0 + lvx_u $in0,$x00,$inp # load next input block + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vcipherlast $out1,$out1,$in1 + lvx_u $in1,$x10,$inp + vcipherlast $out2,$out2,$in2 + le?vperm $in0,$in0,$in0,$leperm + lvx_u $in2,$x20,$inp + vand $tmp,$tmp,$eighty7 + vcipherlast $out3,$out3,$in3 + le?vperm $in1,$in1,$in1,$leperm + lvx_u $in3,$x30,$inp + vcipherlast $out4,$out4,$in4 + le?vperm $in2,$in2,$in2,$leperm + lvx_u $in4,$x40,$inp + vxor $tweak,$tweak,$tmp + vcipherlast $tmp,$out5,$in5 # last block might be needed + # in stealing mode + le?vperm $in3,$in3,$in3,$leperm + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + le?vperm $in4,$in4,$in4,$leperm + le?vperm $in5,$in5,$in5,$leperm + + le?vperm $out0,$out0,$out0,$leperm + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk0 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $out1,$in1,$twk1 + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $out2,$in2,$twk2 + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + vxor $out3,$in3,$twk3 + le?vperm $out5,$tmp,$tmp,$leperm + stvx_u $out4,$x40,$out + vxor $out4,$in4,$twk4 + le?stvx_u $out5,$x50,$out + be?stvx_u $tmp, $x50,$out + vxor $out5,$in5,$twk5 + addi $out,$out,0x60 + + mtctr $rounds + beq Loop_xts_enc6x # did $len-=96 borrow? + + addic. $len,$len,0x60 + beq Lxts_enc6x_zero + cmpwi $len,0x20 + blt Lxts_enc6x_one + nop + beq Lxts_enc6x_two + cmpwi $len,0x40 + blt Lxts_enc6x_three + nop + beq Lxts_enc6x_four + +Lxts_enc6x_five: + vxor $out0,$in1,$twk0 + vxor $out1,$in2,$twk1 + vxor $out2,$in3,$twk2 + vxor $out3,$in4,$twk3 + vxor $out4,$in5,$twk4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk5 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $tmp,$out4,$twk5 # last block prep for stealing + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + stvx_u $out4,$x40,$out + addi $out,$out,0x50 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_four: + vxor $out0,$in2,$twk0 + vxor $out1,$in3,$twk1 + vxor $out2,$in4,$twk2 + vxor $out3,$in5,$twk3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk4 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $tmp,$out3,$twk4 # last block prep for stealing + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + stvx_u $out3,$x30,$out + addi $out,$out,0x40 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_three: + vxor $out0,$in3,$twk0 + vxor $out1,$in4,$twk1 + vxor $out2,$in5,$twk2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk3 # unused tweak + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $tmp,$out2,$twk3 # last block prep for stealing + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + stvx_u $out2,$x20,$out + addi $out,$out,0x30 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_two: + vxor $out0,$in4,$twk0 + vxor $out1,$in5,$twk1 + vxor $out2,$out2,$out2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_enc5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk2 # unused tweak + vxor $tmp,$out1,$twk2 # last block prep for stealing + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + stvx_u $out1,$x10,$out + addi $out,$out,0x20 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_one: + vxor $out0,$in5,$twk0 + nop +Loop_xts_enc1x: + vcipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_enc1x + + add $inp,$inp,$taillen + cmpwi $taillen,0 + vcipher $out0,$out0,v24 + + subi $inp,$inp,16 + vcipher $out0,$out0,v25 + + lvsr $inpperm,0,$taillen + vcipher $out0,$out0,v26 + + lvx_u $in0,0,$inp + vcipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vcipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vcipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk0,$twk0,v31 + + le?vperm $in0,$in0,$in0,$leperm + vcipher $out0,$out0,v30 + + vperm $in0,$in0,$in0,$inpperm + vcipherlast $out0,$out0,$twk0 + + vmr $twk0,$twk1 # unused tweak + vxor $tmp,$out0,$twk1 # last block prep for stealing + le?vperm $out0,$out0,$out0,$leperm + stvx_u $out0,$x00,$out # store output + addi $out,$out,0x10 + bne Lxts_enc6x_steal + b Lxts_enc6x_done + +.align 4 +Lxts_enc6x_zero: + cmpwi $taillen,0 + beq Lxts_enc6x_done + + add $inp,$inp,$taillen + subi $inp,$inp,16 + lvx_u $in0,0,$inp + lvsr $inpperm,0,$taillen # $in5 is no more + le?vperm $in0,$in0,$in0,$leperm + vperm $in0,$in0,$in0,$inpperm + vxor $tmp,$tmp,$twk0 +Lxts_enc6x_steal: + vxor $in0,$in0,$twk0 + vxor $out0,$out0,$out0 + vspltisb $out1,-1 + vperm $out0,$out0,$out1,$inpperm + vsel $out0,$in0,$tmp,$out0 # $tmp is last block, remember? + + subi r30,$out,17 + subi $out,$out,16 + mtctr $taillen +Loop_xts_enc6x_steal: + lbzu r0,1(r30) + stb r0,16(r30) + bdnz Loop_xts_enc6x_steal + + li $taillen,0 + mtctr $rounds + b Loop_xts_enc1x # one more time... + +.align 4 +Lxts_enc6x_done: + ${UCMP}i $ivp,0 + beq Lxts_enc6x_ret + + vxor $tweak,$twk0,$rndkey0 + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_enc6x_ret: + mtlr r11 + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $seven,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,1,0x80,6,6,0 + .long 0 + +.align 5 +_aesp8_xts_enc5x: + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + lvx v25,$x10,$key_ # round[4] + bdnz _aesp8_xts_enc5x + + add $inp,$inp,$taillen + cmpwi $taillen,0 + vcipher $out0,$out0,v24 + vcipher $out1,$out1,v24 + vcipher $out2,$out2,v24 + vcipher $out3,$out3,v24 + vcipher $out4,$out4,v24 + + subi $inp,$inp,16 + vcipher $out0,$out0,v25 + vcipher $out1,$out1,v25 + vcipher $out2,$out2,v25 + vcipher $out3,$out3,v25 + vcipher $out4,$out4,v25 + vxor $twk0,$twk0,v31 + + vcipher $out0,$out0,v26 + lvsr $inpperm,r0,$taillen # $in5 is no more + vcipher $out1,$out1,v26 + vcipher $out2,$out2,v26 + vcipher $out3,$out3,v26 + vcipher $out4,$out4,v26 + vxor $in1,$twk1,v31 + + vcipher $out0,$out0,v27 + lvx_u $in0,0,$inp + vcipher $out1,$out1,v27 + vcipher $out2,$out2,v27 + vcipher $out3,$out3,v27 + vcipher $out4,$out4,v27 + vxor $in2,$twk2,v31 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vcipher $out0,$out0,v28 + vcipher $out1,$out1,v28 + vcipher $out2,$out2,v28 + vcipher $out3,$out3,v28 + vcipher $out4,$out4,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vxor $in3,$twk3,v31 + + vcipher $out0,$out0,v29 + le?vperm $in0,$in0,$in0,$leperm + vcipher $out1,$out1,v29 + vcipher $out2,$out2,v29 + vcipher $out3,$out3,v29 + vcipher $out4,$out4,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $in4,$twk4,v31 + + vcipher $out0,$out0,v30 + vperm $in0,$in0,$in0,$inpperm + vcipher $out1,$out1,v30 + vcipher $out2,$out2,v30 + vcipher $out3,$out3,v30 + vcipher $out4,$out4,v30 + + vcipherlast $out0,$out0,$twk0 + vcipherlast $out1,$out1,$in1 + vcipherlast $out2,$out2,$in2 + vcipherlast $out3,$out3,$in3 + vcipherlast $out4,$out4,$in4 + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 + +.align 5 +_aesp8_xts_decrypt6x: + $STU $sp,-`($FRAME+21*16+6*$SIZE_T)`($sp) + mflr r11 + li r7,`$FRAME+8*16+15` + li r3,`$FRAME+8*16+31` + $PUSH r11,`$FRAME+21*16+6*$SIZE_T+$LRSAVE`($sp) + stvx v20,r7,$sp # ABI says so + addi r7,r7,32 + stvx v21,r3,$sp + addi r3,r3,32 + stvx v22,r7,$sp + addi r7,r7,32 + stvx v23,r3,$sp + addi r3,r3,32 + stvx v24,r7,$sp + addi r7,r7,32 + stvx v25,r3,$sp + addi r3,r3,32 + stvx v26,r7,$sp + addi r7,r7,32 + stvx v27,r3,$sp + addi r3,r3,32 + stvx v28,r7,$sp + addi r7,r7,32 + stvx v29,r3,$sp + addi r3,r3,32 + stvx v30,r7,$sp + stvx v31,r3,$sp + li r0,-1 + stw $vrsave,`$FRAME+21*16-4`($sp) # save vrsave + li $x10,0x10 + $PUSH r26,`$FRAME+21*16+0*$SIZE_T`($sp) + li $x20,0x20 + $PUSH r27,`$FRAME+21*16+1*$SIZE_T`($sp) + li $x30,0x30 + $PUSH r28,`$FRAME+21*16+2*$SIZE_T`($sp) + li $x40,0x40 + $PUSH r29,`$FRAME+21*16+3*$SIZE_T`($sp) + li $x50,0x50 + $PUSH r30,`$FRAME+21*16+4*$SIZE_T`($sp) + li $x60,0x60 + $PUSH r31,`$FRAME+21*16+5*$SIZE_T`($sp) + li $x70,0x70 + mtspr 256,r0 + + subi $rounds,$rounds,3 # -4 in total + + lvx $rndkey0,$x00,$key1 # load key schedule + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + lvx v31,$x00,$key1 + ?vperm $rndkey0,$rndkey0,v30,$keyperm + addi $key_,$sp,$FRAME+15 + mtctr $rounds + +Load_xts_dec_key: + ?vperm v24,v30,v31,$keyperm + lvx v30,$x10,$key1 + addi $key1,$key1,0x20 + stvx v24,$x00,$key_ # off-load round[1] + ?vperm v25,v31,v30,$keyperm + lvx v31,$x00,$key1 + stvx v25,$x10,$key_ # off-load round[2] + addi $key_,$key_,0x20 + bdnz Load_xts_dec_key + + lvx v26,$x10,$key1 + ?vperm v24,v30,v31,$keyperm + lvx v27,$x20,$key1 + stvx v24,$x00,$key_ # off-load round[3] + ?vperm v25,v31,v26,$keyperm + lvx v28,$x30,$key1 + stvx v25,$x10,$key_ # off-load round[4] + addi $key_,$sp,$FRAME+15 # rewind $key_ + ?vperm v26,v26,v27,$keyperm + lvx v29,$x40,$key1 + ?vperm v27,v27,v28,$keyperm + lvx v30,$x50,$key1 + ?vperm v28,v28,v29,$keyperm + lvx v31,$x60,$key1 + ?vperm v29,v29,v30,$keyperm + lvx $twk5,$x70,$key1 # borrow $twk5 + ?vperm v30,v30,v31,$keyperm + lvx v24,$x00,$key_ # pre-load round[1] + ?vperm v31,v31,$twk5,$keyperm + lvx v25,$x10,$key_ # pre-load round[2] + + vperm $in0,$inout,$inptail,$inpperm + subi $inp,$inp,31 # undo "caller" + vxor $twk0,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vand $tmp,$tmp,$eighty7 + vxor $out0,$in0,$twk0 + vxor $tweak,$tweak,$tmp + + lvx_u $in1,$x10,$inp + vxor $twk1,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in1,$in1,$in1,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out1,$in1,$twk1 + vxor $tweak,$tweak,$tmp + + lvx_u $in2,$x20,$inp + andi. $taillen,$len,15 + vxor $twk2,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in2,$in2,$in2,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out2,$in2,$twk2 + vxor $tweak,$tweak,$tmp + + lvx_u $in3,$x30,$inp + sub $len,$len,$taillen + vxor $twk3,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in3,$in3,$in3,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out3,$in3,$twk3 + vxor $tweak,$tweak,$tmp + + lvx_u $in4,$x40,$inp + subi $len,$len,0x60 + vxor $twk4,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in4,$in4,$in4,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out4,$in4,$twk4 + vxor $tweak,$tweak,$tmp + + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + vxor $twk5,$tweak,$rndkey0 + vsrab $tmp,$tweak,$seven # next tweak value + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + le?vperm $in5,$in5,$in5,$leperm + vand $tmp,$tmp,$eighty7 + vxor $out5,$in5,$twk5 + vxor $tweak,$tweak,$tmp + + vxor v31,v31,$rndkey0 + mtctr $rounds + b Loop_xts_dec6x + +.align 5 +Loop_xts_dec6x: + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_dec6x + + subic $len,$len,96 # $len-=96 + vxor $in0,$twk0,v31 # xor with last round key + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk0,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out4,$out4,v24 + vncipher $out5,$out5,v24 + + subfe. r0,r0,r0 # borrow?-1:0 + vand $tmp,$tmp,$eighty7 + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vxor $tweak,$tweak,$tmp + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vxor $in1,$twk1,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk1,$tweak,$rndkey0 + vncipher $out4,$out4,v25 + vncipher $out5,$out5,v25 + + and r0,r0,$len + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out0,$out0,v26 + vncipher $out1,$out1,v26 + vand $tmp,$tmp,$eighty7 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vxor $tweak,$tweak,$tmp + vncipher $out4,$out4,v26 + vncipher $out5,$out5,v26 + + add $inp,$inp,r0 # $inp is adjusted in such + # way that at exit from the + # loop inX-in5 are loaded + # with last "words" + vxor $in2,$twk2,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk2,$tweak,$rndkey0 + vaddubm $tweak,$tweak,$tweak + vncipher $out0,$out0,v27 + vncipher $out1,$out1,v27 + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vand $tmp,$tmp,$eighty7 + vncipher $out4,$out4,v27 + vncipher $out5,$out5,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vxor $tweak,$tweak,$tmp + vncipher $out0,$out0,v28 + vncipher $out1,$out1,v28 + vxor $in3,$twk3,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk3,$tweak,$rndkey0 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipher $out4,$out4,v28 + vncipher $out5,$out5,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vand $tmp,$tmp,$eighty7 + + vncipher $out0,$out0,v29 + vncipher $out1,$out1,v29 + vxor $tweak,$tweak,$tmp + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vxor $in4,$twk4,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk4,$tweak,$rndkey0 + vncipher $out4,$out4,v29 + vncipher $out5,$out5,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + + vncipher $out0,$out0,v30 + vncipher $out1,$out1,v30 + vand $tmp,$tmp,$eighty7 + vncipher $out2,$out2,v30 + vncipher $out3,$out3,v30 + vxor $tweak,$tweak,$tmp + vncipher $out4,$out4,v30 + vncipher $out5,$out5,v30 + vxor $in5,$twk5,v31 + vsrab $tmp,$tweak,$seven # next tweak value + vxor $twk5,$tweak,$rndkey0 + + vncipherlast $out0,$out0,$in0 + lvx_u $in0,$x00,$inp # load next input block + vaddubm $tweak,$tweak,$tweak + vsldoi $tmp,$tmp,$tmp,15 + vncipherlast $out1,$out1,$in1 + lvx_u $in1,$x10,$inp + vncipherlast $out2,$out2,$in2 + le?vperm $in0,$in0,$in0,$leperm + lvx_u $in2,$x20,$inp + vand $tmp,$tmp,$eighty7 + vncipherlast $out3,$out3,$in3 + le?vperm $in1,$in1,$in1,$leperm + lvx_u $in3,$x30,$inp + vncipherlast $out4,$out4,$in4 + le?vperm $in2,$in2,$in2,$leperm + lvx_u $in4,$x40,$inp + vxor $tweak,$tweak,$tmp + vncipherlast $out5,$out5,$in5 + le?vperm $in3,$in3,$in3,$leperm + lvx_u $in5,$x50,$inp + addi $inp,$inp,0x60 + le?vperm $in4,$in4,$in4,$leperm + le?vperm $in5,$in5,$in5,$leperm + + le?vperm $out0,$out0,$out0,$leperm + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk0 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + vxor $out1,$in1,$twk1 + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + vxor $out2,$in2,$twk2 + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + vxor $out3,$in3,$twk3 + le?vperm $out5,$out5,$out5,$leperm + stvx_u $out4,$x40,$out + vxor $out4,$in4,$twk4 + stvx_u $out5,$x50,$out + vxor $out5,$in5,$twk5 + addi $out,$out,0x60 + + mtctr $rounds + beq Loop_xts_dec6x # did $len-=96 borrow? + + addic. $len,$len,0x60 + beq Lxts_dec6x_zero + cmpwi $len,0x20 + blt Lxts_dec6x_one + nop + beq Lxts_dec6x_two + cmpwi $len,0x40 + blt Lxts_dec6x_three + nop + beq Lxts_dec6x_four + +Lxts_dec6x_five: + vxor $out0,$in1,$twk0 + vxor $out1,$in2,$twk1 + vxor $out2,$in3,$twk2 + vxor $out3,$in4,$twk3 + vxor $out4,$in5,$twk4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk5 # unused tweak + vxor $twk1,$tweak,$rndkey0 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk1 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + le?vperm $out4,$out4,$out4,$leperm + stvx_u $out3,$x30,$out + stvx_u $out4,$x40,$out + addi $out,$out,0x50 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_four: + vxor $out0,$in2,$twk0 + vxor $out1,$in3,$twk1 + vxor $out2,$in4,$twk2 + vxor $out3,$in5,$twk3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk4 # unused tweak + vmr $twk1,$twk5 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk5 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + le?vperm $out3,$out3,$out3,$leperm + stvx_u $out2,$x20,$out + stvx_u $out3,$x30,$out + addi $out,$out,0x40 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_three: + vxor $out0,$in3,$twk0 + vxor $out1,$in4,$twk1 + vxor $out2,$in5,$twk2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk3 # unused tweak + vmr $twk1,$twk4 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk4 + le?vperm $out2,$out2,$out2,$leperm + stvx_u $out1,$x10,$out + stvx_u $out2,$x20,$out + addi $out,$out,0x30 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_two: + vxor $out0,$in4,$twk0 + vxor $out1,$in5,$twk1 + vxor $out2,$out2,$out2 + vxor $out3,$out3,$out3 + vxor $out4,$out4,$out4 + + bl _aesp8_xts_dec5x + + le?vperm $out0,$out0,$out0,$leperm + vmr $twk0,$twk2 # unused tweak + vmr $twk1,$twk3 + le?vperm $out1,$out1,$out1,$leperm + stvx_u $out0,$x00,$out # store output + vxor $out0,$in0,$twk3 + stvx_u $out1,$x10,$out + addi $out,$out,0x20 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_one: + vxor $out0,$in5,$twk0 + nop +Loop_xts_dec1x: + vncipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Loop_xts_dec1x + + subi r0,$taillen,1 + vncipher $out0,$out0,v24 + + andi. r0,r0,16 + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + + sub $inp,$inp,r0 + vncipher $out0,$out0,v26 + + lvx_u $in0,0,$inp + vncipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vncipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk0,$twk0,v31 + + le?vperm $in0,$in0,$in0,$leperm + vncipher $out0,$out0,v30 + + mtctr $rounds + vncipherlast $out0,$out0,$twk0 + + vmr $twk0,$twk1 # unused tweak + vmr $twk1,$twk2 + le?vperm $out0,$out0,$out0,$leperm + stvx_u $out0,$x00,$out # store output + addi $out,$out,0x10 + vxor $out0,$in0,$twk2 + bne Lxts_dec6x_steal + b Lxts_dec6x_done + +.align 4 +Lxts_dec6x_zero: + cmpwi $taillen,0 + beq Lxts_dec6x_done + + lvx_u $in0,0,$inp + le?vperm $in0,$in0,$in0,$leperm + vxor $out0,$in0,$twk1 +Lxts_dec6x_steal: + vncipher $out0,$out0,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + lvx v25,$x10,$key_ # round[4] + bdnz Lxts_dec6x_steal + + add $inp,$inp,$taillen + vncipher $out0,$out0,v24 + + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + + lvx_u $in0,0,$inp + vncipher $out0,$out0,v26 + + lvsr $inpperm,0,$taillen # $in5 is no more + vncipher $out0,$out0,v27 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + + vncipher $out0,$out0,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $twk1,$twk1,v31 + + le?vperm $in0,$in0,$in0,$leperm + vncipher $out0,$out0,v30 + + vperm $in0,$in0,$in0,$inpperm + vncipherlast $tmp,$out0,$twk1 + + le?vperm $out0,$tmp,$tmp,$leperm + le?stvx_u $out0,0,$out + be?stvx_u $tmp,0,$out + + vxor $out0,$out0,$out0 + vspltisb $out1,-1 + vperm $out0,$out0,$out1,$inpperm + vsel $out0,$in0,$tmp,$out0 + vxor $out0,$out0,$twk0 + + subi r30,$out,1 + mtctr $taillen +Loop_xts_dec6x_steal: + lbzu r0,1(r30) + stb r0,16(r30) + bdnz Loop_xts_dec6x_steal + + li $taillen,0 + mtctr $rounds + b Loop_xts_dec1x # one more time... + +.align 4 +Lxts_dec6x_done: + ${UCMP}i $ivp,0 + beq Lxts_dec6x_ret + + vxor $tweak,$twk0,$rndkey0 + le?vperm $tweak,$tweak,$tweak,$leperm + stvx_u $tweak,0,$ivp + +Lxts_dec6x_ret: + mtlr r11 + li r10,`$FRAME+15` + li r11,`$FRAME+31` + stvx $seven,r10,$sp # wipe copies of round keys + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + stvx $seven,r10,$sp + addi r10,r10,32 + stvx $seven,r11,$sp + addi r11,r11,32 + + mtspr 256,$vrsave + lvx v20,r10,$sp # ABI says so + addi r10,r10,32 + lvx v21,r11,$sp + addi r11,r11,32 + lvx v22,r10,$sp + addi r10,r10,32 + lvx v23,r11,$sp + addi r11,r11,32 + lvx v24,r10,$sp + addi r10,r10,32 + lvx v25,r11,$sp + addi r11,r11,32 + lvx v26,r10,$sp + addi r10,r10,32 + lvx v27,r11,$sp + addi r11,r11,32 + lvx v28,r10,$sp + addi r10,r10,32 + lvx v29,r11,$sp + addi r11,r11,32 + lvx v30,r10,$sp + lvx v31,r11,$sp + $POP r26,`$FRAME+21*16+0*$SIZE_T`($sp) + $POP r27,`$FRAME+21*16+1*$SIZE_T`($sp) + $POP r28,`$FRAME+21*16+2*$SIZE_T`($sp) + $POP r29,`$FRAME+21*16+3*$SIZE_T`($sp) + $POP r30,`$FRAME+21*16+4*$SIZE_T`($sp) + $POP r31,`$FRAME+21*16+5*$SIZE_T`($sp) + addi $sp,$sp,`$FRAME+21*16+6*$SIZE_T` + blr + .long 0 + .byte 0,12,0x04,1,0x80,6,6,0 + .long 0 + +.align 5 +_aesp8_xts_dec5x: + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + lvx v24,$x20,$key_ # round[3] + addi $key_,$key_,0x20 + + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + lvx v25,$x10,$key_ # round[4] + bdnz _aesp8_xts_dec5x + + subi r0,$taillen,1 + vncipher $out0,$out0,v24 + vncipher $out1,$out1,v24 + vncipher $out2,$out2,v24 + vncipher $out3,$out3,v24 + vncipher $out4,$out4,v24 + + andi. r0,r0,16 + cmpwi $taillen,0 + vncipher $out0,$out0,v25 + vncipher $out1,$out1,v25 + vncipher $out2,$out2,v25 + vncipher $out3,$out3,v25 + vncipher $out4,$out4,v25 + vxor $twk0,$twk0,v31 + + sub $inp,$inp,r0 + vncipher $out0,$out0,v26 + vncipher $out1,$out1,v26 + vncipher $out2,$out2,v26 + vncipher $out3,$out3,v26 + vncipher $out4,$out4,v26 + vxor $in1,$twk1,v31 + + vncipher $out0,$out0,v27 + lvx_u $in0,0,$inp + vncipher $out1,$out1,v27 + vncipher $out2,$out2,v27 + vncipher $out3,$out3,v27 + vncipher $out4,$out4,v27 + vxor $in2,$twk2,v31 + + addi $key_,$sp,$FRAME+15 # rewind $key_ + vncipher $out0,$out0,v28 + vncipher $out1,$out1,v28 + vncipher $out2,$out2,v28 + vncipher $out3,$out3,v28 + vncipher $out4,$out4,v28 + lvx v24,$x00,$key_ # re-pre-load round[1] + vxor $in3,$twk3,v31 + + vncipher $out0,$out0,v29 + le?vperm $in0,$in0,$in0,$leperm + vncipher $out1,$out1,v29 + vncipher $out2,$out2,v29 + vncipher $out3,$out3,v29 + vncipher $out4,$out4,v29 + lvx v25,$x10,$key_ # re-pre-load round[2] + vxor $in4,$twk4,v31 + + vncipher $out0,$out0,v30 + vncipher $out1,$out1,v30 + vncipher $out2,$out2,v30 + vncipher $out3,$out3,v30 + vncipher $out4,$out4,v30 + + vncipherlast $out0,$out0,$twk0 + vncipherlast $out1,$out1,$in1 + vncipherlast $out2,$out2,$in2 + vncipherlast $out3,$out3,$in3 + vncipherlast $out4,$out4,$in4 + mtctr $rounds + blr + .long 0 + .byte 0,12,0x14,0,0,0,0,0 +___ +}} }}} + my $consts=1; foreach(split("\n",$code)) { s/\`([^\`]*)\`/eval($1)/geo; @@ -1898,7 +3759,7 @@ foreach(split("\n",$code)) { if ($flavour =~ /le$/o) { SWITCH: for($conv) { /\?inv/ && do { @bytes=map($_^0xf,@bytes); last; }; - /\?rev/ && do { @bytes=reverse(@bytes); last; }; + /\?rev/ && do { @bytes=reverse(@bytes); last; }; } } -- cgit v1.2.3 From c07f5d3da643329f38ff7c2ef2252723453dd9c4 Mon Sep 17 00:00:00 2001 From: "Leonidas S. Barbosa" Date: Mon, 18 Jul 2016 12:26:26 -0300 Subject: crypto: vmx - Adding support for XTS This patch add XTS support using VMX-crypto driver. Signed-off-by: Leonidas S. Barbosa Signed-off-by: Paulo Flabiano Smorigo Signed-off-by: Herbert Xu --- drivers/crypto/vmx/Makefile | 2 +- drivers/crypto/vmx/aes_xts.c | 190 +++++++++++++++++++++++++++++++++++++++++++ drivers/crypto/vmx/vmx.c | 2 + 3 files changed, 193 insertions(+), 1 deletion(-) create mode 100644 drivers/crypto/vmx/aes_xts.c (limited to 'drivers/crypto') diff --git a/drivers/crypto/vmx/Makefile b/drivers/crypto/vmx/Makefile index d28ab96a2475..de6e241b0866 100644 --- a/drivers/crypto/vmx/Makefile +++ b/drivers/crypto/vmx/Makefile @@ -1,5 +1,5 @@ obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o -vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o ghash.o +vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o aes_xts.o ghash.o ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) TARGET := linux-ppc64le diff --git a/drivers/crypto/vmx/aes_xts.c b/drivers/crypto/vmx/aes_xts.c new file mode 100644 index 000000000000..cfb25413917c --- /dev/null +++ b/drivers/crypto/vmx/aes_xts.c @@ -0,0 +1,190 @@ +/** + * AES XTS routines supporting VMX In-core instructions on Power 8 + * + * Copyright (C) 2015 International Business Machines Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundations; version 2 only. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY of FITNESS FOR A PARTICUPAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * + * Author: Leonidas S. Barbosa + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "aesp8-ppc.h" + +struct p8_aes_xts_ctx { + struct crypto_blkcipher *fallback; + struct aes_key enc_key; + struct aes_key dec_key; + struct aes_key tweak_key; +}; + +static int p8_aes_xts_init(struct crypto_tfm *tfm) +{ + const char *alg; + struct crypto_blkcipher *fallback; + struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!(alg = crypto_tfm_alg_name(tfm))) { + printk(KERN_ERR "Failed to get algorithm name.\n"); + return -ENOENT; + } + + fallback = + crypto_alloc_blkcipher(alg, 0, CRYPTO_ALG_NEED_FALLBACK); + if (IS_ERR(fallback)) { + printk(KERN_ERR + "Failed to allocate transformation for '%s': %ld\n", + alg, PTR_ERR(fallback)); + return PTR_ERR(fallback); + } + printk(KERN_INFO "Using '%s' as fallback implementation.\n", + crypto_tfm_alg_driver_name((struct crypto_tfm *) fallback)); + + crypto_blkcipher_set_flags( + fallback, + crypto_blkcipher_get_flags((struct crypto_blkcipher *)tfm)); + ctx->fallback = fallback; + + return 0; +} + +static void p8_aes_xts_exit(struct crypto_tfm *tfm) +{ + struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + + if (ctx->fallback) { + crypto_free_blkcipher(ctx->fallback); + ctx->fallback = NULL; + } +} + +static int p8_aes_xts_setkey(struct crypto_tfm *tfm, const u8 *key, + unsigned int keylen) +{ + int ret; + struct p8_aes_xts_ctx *ctx = crypto_tfm_ctx(tfm); + + ret = xts_check_key(tfm, key, keylen); + if (ret) + return ret; + + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); + ret = aes_p8_set_encrypt_key(key + keylen/2, (keylen/2) * 8, &ctx->tweak_key); + ret += aes_p8_set_encrypt_key(key, (keylen/2) * 8, &ctx->enc_key); + ret += aes_p8_set_decrypt_key(key, (keylen/2) * 8, &ctx->dec_key); + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + + ret += crypto_blkcipher_setkey(ctx->fallback, key, keylen); + return ret; +} + +static int p8_aes_xts_crypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, + unsigned int nbytes, int enc) +{ + int ret; + u8 tweak[AES_BLOCK_SIZE]; + u8 *iv; + struct blkcipher_walk walk; + struct p8_aes_xts_ctx *ctx = + crypto_tfm_ctx(crypto_blkcipher_tfm(desc->tfm)); + struct blkcipher_desc fallback_desc = { + .tfm = ctx->fallback, + .info = desc->info, + .flags = desc->flags + }; + + if (in_interrupt()) { + ret = enc ? crypto_blkcipher_encrypt(&fallback_desc, dst, src, nbytes) : + crypto_blkcipher_decrypt(&fallback_desc, dst, src, nbytes); + } else { + preempt_disable(); + pagefault_disable(); + enable_kernel_vsx(); + + blkcipher_walk_init(&walk, dst, src, nbytes); + + iv = (u8 *)walk.iv; + ret = blkcipher_walk_virt(desc, &walk); + memset(tweak, 0, AES_BLOCK_SIZE); + aes_p8_encrypt(iv, tweak, &ctx->tweak_key); + + while ((nbytes = walk.nbytes)) { + if (enc) + aes_p8_xts_encrypt(walk.src.virt.addr, walk.dst.virt.addr, + nbytes & AES_BLOCK_MASK, &ctx->enc_key, NULL, tweak); + else + aes_p8_xts_decrypt(walk.src.virt.addr, walk.dst.virt.addr, + nbytes & AES_BLOCK_MASK, &ctx->dec_key, NULL, tweak); + + nbytes &= AES_BLOCK_SIZE - 1; + ret = blkcipher_walk_done(desc, &walk, nbytes); + } + + disable_kernel_vsx(); + pagefault_enable(); + preempt_enable(); + } + return ret; +} + +static int p8_aes_xts_encrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return p8_aes_xts_crypt(desc, dst, src, nbytes, 1); +} + +static int p8_aes_xts_decrypt(struct blkcipher_desc *desc, + struct scatterlist *dst, + struct scatterlist *src, unsigned int nbytes) +{ + return p8_aes_xts_crypt(desc, dst, src, nbytes, 0); +} + +struct crypto_alg p8_aes_xts_alg = { + .cra_name = "xts(aes)", + .cra_driver_name = "p8_aes_xts", + .cra_module = THIS_MODULE, + .cra_priority = 2000, + .cra_type = &crypto_blkcipher_type, + .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER | CRYPTO_ALG_NEED_FALLBACK, + .cra_alignmask = 0, + .cra_blocksize = AES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct p8_aes_xts_ctx), + .cra_init = p8_aes_xts_init, + .cra_exit = p8_aes_xts_exit, + .cra_blkcipher = { + .ivsize = AES_BLOCK_SIZE, + .min_keysize = 2 * AES_MIN_KEY_SIZE, + .max_keysize = 2 * AES_MAX_KEY_SIZE, + .setkey = p8_aes_xts_setkey, + .encrypt = p8_aes_xts_encrypt, + .decrypt = p8_aes_xts_decrypt, + } +}; diff --git a/drivers/crypto/vmx/vmx.c b/drivers/crypto/vmx/vmx.c index e163d5770438..f688c32fbcc7 100644 --- a/drivers/crypto/vmx/vmx.c +++ b/drivers/crypto/vmx/vmx.c @@ -31,10 +31,12 @@ extern struct shash_alg p8_ghash_alg; extern struct crypto_alg p8_aes_alg; extern struct crypto_alg p8_aes_cbc_alg; extern struct crypto_alg p8_aes_ctr_alg; +extern struct crypto_alg p8_aes_xts_alg; static struct crypto_alg *algs[] = { &p8_aes_alg, &p8_aes_cbc_alg, &p8_aes_ctr_alg, + &p8_aes_xts_alg, NULL, }; -- cgit v1.2.3 From 48a9343216f9c8ff3066cd12c90d161d1d9536d8 Mon Sep 17 00:00:00 2001 From: Paulo Flabiano Smorigo Date: Tue, 19 Jul 2016 10:36:26 -0300 Subject: crypto: vmx - Ignore generated files Ignore assembly files generated by the perl script. Signed-off-by: Paulo Flabiano Smorigo Signed-off-by: Herbert Xu --- drivers/crypto/vmx/.gitignore | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 drivers/crypto/vmx/.gitignore (limited to 'drivers/crypto') diff --git a/drivers/crypto/vmx/.gitignore b/drivers/crypto/vmx/.gitignore new file mode 100644 index 000000000000..af4a7ce4738d --- /dev/null +++ b/drivers/crypto/vmx/.gitignore @@ -0,0 +1,2 @@ +aesp8-ppc.S +ghashp8-ppc.S -- cgit v1.2.3 From 16dee78005be9908197c707393a3b6a61a14b4fb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 20 Jul 2016 22:32:50 +0800 Subject: crypto: vmx - Fix aes_p8_xts_decrypt build failure We use _GLOBAL so there is no need to do the manual alignment, in fact it causes a build failure. Reported-by: Stephen Rothwell Signed-off-by: Herbert Xu --- drivers/crypto/vmx/aesp8-ppc.pl | 2 -- 1 file changed, 2 deletions(-) (limited to 'drivers/crypto') diff --git a/drivers/crypto/vmx/aesp8-ppc.pl b/drivers/crypto/vmx/aesp8-ppc.pl index 813ffcc4d17c..0b4a293b8a1e 100644 --- a/drivers/crypto/vmx/aesp8-ppc.pl +++ b/drivers/crypto/vmx/aesp8-ppc.pl @@ -2125,8 +2125,6 @@ Lxts_enc_ret: .size .${prefix}_xts_encrypt,.-.${prefix}_xts_encrypt .globl .${prefix}_xts_decrypt -.align 5 -.${prefix}_xts_decrypt: mr $inp,r3 # reassign li r3,-1 ${UCMP}i $len,16 -- cgit v1.2.3