+job_id_get(uint32_t qid, size_t qp_size, size_t index) {
+ return qid * qp_size + (index & (qp_size - 1));
+}
+
+#ifdef HAVE_MLX5_UMR_IMKEY
+static inline int
+mkey_klm_available(struct mlx5_klm *klm, uint32_t pos, uint32_t new)
+{
+ return (klm && ((pos + new) <= MLX5_REGEX_MAX_KLM_NUM));
+}
+
+static inline void
+complete_umr_wqe(struct mlx5_regex_qp *qp, struct mlx5_regex_hw_qp *qp_obj,
+ struct mlx5_regex_job *mkey_job,
+ size_t umr_index, uint32_t klm_size, uint32_t total_len)
+{
+ size_t wqe_offset = (umr_index & (qp_size_get(qp_obj) - 1)) *
+ (MLX5_SEND_WQE_BB * 4);
+ struct mlx5_wqe_ctrl_seg *wqe = (struct mlx5_wqe_ctrl_seg *)((uint8_t *)
+ (uintptr_t)qp_obj->qp_obj.wqes + wqe_offset);
+ struct mlx5_wqe_umr_ctrl_seg *ucseg =
+ (struct mlx5_wqe_umr_ctrl_seg *)(wqe + 1);
+ struct mlx5_wqe_mkey_context_seg *mkc =
+ (struct mlx5_wqe_mkey_context_seg *)(ucseg + 1);
+ struct mlx5_klm *iklm = (struct mlx5_klm *)(mkc + 1);
+ uint16_t klm_align = RTE_ALIGN(klm_size, 4);
+
+ memset(wqe, 0, MLX5_REGEX_UMR_WQE_SIZE);
+ /* Set WQE control seg. Non-inline KLM UMR WQE size must be 9 WQE_DS. */
+ set_wqe_ctrl_seg(wqe, (umr_index * 4), MLX5_OPCODE_UMR,
+ 0, qp_obj->qp_obj.qp->id, 0, 9, 0,
+ rte_cpu_to_be_32(mkey_job->imkey->id));
+ /* Set UMR WQE control seg. */
+ ucseg->mkey_mask |= rte_cpu_to_be_64(MLX5_WQE_UMR_CTRL_MKEY_MASK_LEN |
+ MLX5_WQE_UMR_CTRL_FLAG_TRNSLATION_OFFSET |
+ MLX5_WQE_UMR_CTRL_MKEY_MASK_ACCESS_LOCAL_WRITE);
+ ucseg->klm_octowords = rte_cpu_to_be_16(klm_align);
+ /* Set mkey context seg. */
+ mkc->len = rte_cpu_to_be_64(total_len);
+ mkc->qpn_mkey = rte_cpu_to_be_32(0xffffff00 |
+ (mkey_job->imkey->id & 0xff));
+ /* Set UMR pointer to data seg. */
+ iklm->address = rte_cpu_to_be_64
+ ((uintptr_t)((char *)mkey_job->imkey_array));
+ iklm->mkey = rte_cpu_to_be_32(qp->imkey_addr->lkey);
+ iklm->byte_count = rte_cpu_to_be_32(klm_align);
+ /* Clear the padding memory. */
+ memset((uint8_t *)&mkey_job->imkey_array[klm_size], 0,
+ sizeof(struct mlx5_klm) * (klm_align - klm_size));
+
+ /* Add the following RegEx WQE with fence. */
+ wqe = (struct mlx5_wqe_ctrl_seg *)
+ (((uint8_t *)wqe) + MLX5_REGEX_UMR_WQE_SIZE);
+ wqe->fm_ce_se |= MLX5_WQE_CTRL_INITIATOR_SMALL_FENCE;
+}
+
+static inline void
+prep_nop_regex_wqe_set(struct mlx5_regex_priv *priv,
+ struct mlx5_regex_hw_qp *qp, struct rte_regex_ops *op,
+ struct mlx5_regex_job *job, size_t pi, struct mlx5_klm *klm)
+{
+ size_t wqe_offset = (pi & (qp_size_get(qp) - 1)) *
+ (MLX5_SEND_WQE_BB << 2);
+ struct mlx5_wqe_ctrl_seg *wqe = (struct mlx5_wqe_ctrl_seg *)((uint8_t *)
+ (uintptr_t)qp->qp_obj.wqes + wqe_offset);
+
+ /* Clear the WQE memory used as UMR WQE previously. */
+ if ((rte_be_to_cpu_32(wqe->opmod_idx_opcode) & 0xff) != MLX5_OPCODE_NOP)
+ memset(wqe, 0, MLX5_REGEX_UMR_WQE_SIZE);
+ /* UMR WQE size is 9 DS, align nop WQE to 3 WQEBBS(12 DS). */
+ set_wqe_ctrl_seg(wqe, pi * 4, MLX5_OPCODE_NOP, 0, qp->qp_obj.qp->id,
+ 0, 12, 0, 0);
+ __prep_one(priv, qp, op, job, pi, klm);
+}
+
+static inline void
+prep_regex_umr_wqe_set(struct mlx5_regex_priv *priv, struct mlx5_regex_qp *qp,
+ struct mlx5_regex_hw_qp *qp_obj, struct rte_regex_ops **op,
+ size_t nb_ops)
+{
+ struct mlx5_regex_job *job = NULL;
+ size_t hw_qpid = qp_obj->qpn, mkey_job_id = 0;
+ size_t left_ops = nb_ops;
+ uint32_t klm_num = 0;
+ uint32_t len = 0;
+ struct mlx5_klm *mkey_klm = NULL;
+ struct mlx5_klm klm;
+ uintptr_t addr;
+
+ while (left_ops--)
+ rte_prefetch0(op[left_ops]);
+ left_ops = nb_ops;
+ /*
+ * Build the WQE set by reverse. In case the burst may consume
+ * multiple mkeys, build the WQE set as normal will hard to
+ * address the last mkey index, since we will only know the last
+ * RegEx WQE's index when finishes building.
+ */
+ while (left_ops--) {
+ struct rte_mbuf *mbuf = op[left_ops]->mbuf;
+ size_t pi = MLX5_REGEX_UMR_QP_PI_IDX(qp_obj->pi, left_ops);
+
+ if (mbuf->nb_segs > 1) {
+ size_t scatter_size = 0;
+
+ if (!mkey_klm_available(mkey_klm, klm_num,
+ mbuf->nb_segs)) {
+ /*
+ * The mkey's KLM is full, create the UMR
+ * WQE in the next WQE set.
+ */
+ if (mkey_klm)
+ complete_umr_wqe(qp, qp_obj,
+ &qp->jobs[mkey_job_id],
+ MLX5_REGEX_UMR_QP_PI_IDX(pi, 1),
+ klm_num, len);
+ /*
+ * Get the indircet mkey and KLM array index
+ * from the last WQE set.
+ */
+ mkey_job_id = job_id_get(hw_qpid,
+ qp_size_get(qp_obj), pi);
+ mkey_klm = qp->jobs[mkey_job_id].imkey_array;
+ klm_num = 0;
+ len = 0;
+ }
+ /* Build RegEx WQE's data segment KLM. */
+ klm.address = len;
+ klm.mkey = rte_cpu_to_be_32
+ (qp->jobs[mkey_job_id].imkey->id);
+ while (mbuf) {
+ addr = rte_pktmbuf_mtod(mbuf, uintptr_t);
+ /* Build indirect mkey seg's KLM. */
+ mkey_klm->mkey = mlx5_regex_mb2mr(priv,
+ &qp->mr_ctrl,
+ mbuf);
+ mkey_klm->address = rte_cpu_to_be_64(addr);
+ mkey_klm->byte_count = rte_cpu_to_be_32
+ (rte_pktmbuf_data_len(mbuf));
+ /*
+ * Save the mbuf's total size for RegEx data
+ * segment.
+ */
+ scatter_size += rte_pktmbuf_data_len(mbuf);
+ mkey_klm++;
+ klm_num++;
+ mbuf = mbuf->next;
+ }
+ len += scatter_size;
+ klm.byte_count = scatter_size;
+ } else {
+ /* The single mubf case. Build the KLM directly. */
+ klm.mkey = mlx5_regex_mb2mr(priv, &qp->mr_ctrl, mbuf);
+ klm.address = rte_pktmbuf_mtod(mbuf, uintptr_t);
+ klm.byte_count = rte_pktmbuf_data_len(mbuf);
+ }
+ job = &qp->jobs[job_id_get(hw_qpid, qp_size_get(qp_obj), pi)];
+ /*
+ * Build the nop + RegEx WQE set by default. The fist nop WQE
+ * will be updated later as UMR WQE if scattered mubf exist.
+ */
+ prep_nop_regex_wqe_set(priv, qp_obj, op[left_ops], job, pi,
+ &klm);
+ }
+ /*
+ * Scattered mbuf have been added to the KLM array. Complete the build
+ * of UMR WQE, update the first nop WQE as UMR WQE.
+ */
+ if (mkey_klm)
+ complete_umr_wqe(qp, qp_obj, &qp->jobs[mkey_job_id], qp_obj->pi,
+ klm_num, len);
+ qp_obj->db_pi = MLX5_REGEX_UMR_QP_PI_IDX(qp_obj->pi, nb_ops - 1);
+ qp_obj->pi = MLX5_REGEX_UMR_QP_PI_IDX(qp_obj->pi, nb_ops);