|
| 1 | +/* |
| 2 | + * Copyright (C) 2024 Intel Corporation. All rights reserved. |
| 3 | + * Authors: |
| 4 | + * Gustavo A Espinoza <gustavo.adolfo.espinoza.quintero@intel.com> |
| 5 | + * <gustavoaespinozaq@hotmail.com> |
| 6 | + * |
| 7 | + * For conditions of distribution and use, see copyright notice in zlib.h |
| 8 | + */ |
| 9 | +#include "deflate_qat.h" |
| 10 | +#include "deflate.h" |
| 11 | + |
| 12 | +#include "session.hpp" |
| 13 | +#include "qat_instance.hpp" |
| 14 | +#include "qat_buffer_list.hpp" |
| 15 | +#include "qat.hpp" |
| 16 | + |
| 17 | +#include <memory> |
| 18 | + |
| 19 | +/* |
| 20 | +* TODO(gustavoa): Make the input size adjustable from the memlevel |
| 21 | +* attribute on deflateInit. |
| 22 | +*/ |
| 23 | +static constexpr size_t kInputSize = 1024 * 1024; |
| 24 | + |
| 25 | +/* QAT Instances obtained available from the library. */ |
| 26 | +static std::vector<std::shared_ptr<qat::Instance>> qat_instances; |
| 27 | + |
| 28 | +/* |
| 29 | +* TODO(gustavoa): Verify if the ordering of the struct fields won't create |
| 30 | +* unnecessary holes in the structure that requires extraneous padding. |
| 31 | +*/ |
| 32 | +struct qat_deflate { |
| 33 | + std::unique_ptr<qat::DeflateSession> qat_session; |
| 34 | + |
| 35 | + /* QAT requires contiguous physical pages. Cannot be allocated using |
| 36 | + * malloc/new. |
| 37 | + */ |
| 38 | + uint8_t *input_buffer; |
| 39 | + uint8_t *output_buffer; |
| 40 | + |
| 41 | + /* Pointer to the next byte in the output buffer. */ |
| 42 | + uint8_t *pending_out; |
| 43 | + |
| 44 | + unsigned input_buffer_size; |
| 45 | + unsigned output_buffer_size; |
| 46 | + |
| 47 | + unsigned pending_in_count; |
| 48 | + unsigned pending_out_count; |
| 49 | +}; |
| 50 | + |
| 51 | +static std::unique_ptr<qat::DeflateSession> qat_create_session(int level, int wrap) |
| 52 | +{ |
| 53 | + CpaDcChecksum checksum = CPA_DC_NONE; |
| 54 | + |
| 55 | + switch(wrap) { |
| 56 | + case 1: |
| 57 | + checksum = CPA_DC_ADLER32; |
| 58 | + break; |
| 59 | + case 2: |
| 60 | + checksum = CPA_DC_CRC32; |
| 61 | + break; |
| 62 | + } |
| 63 | + |
| 64 | + return std::make_unique<qat::DeflateSession>( |
| 65 | + qat_instances[0], |
| 66 | + (CpaDcCompLvl)level, |
| 67 | + checksum, |
| 68 | + 0 |
| 69 | + ); |
| 70 | +} |
| 71 | + |
| 72 | + |
| 73 | +int qat_deflate_init() |
| 74 | +{ |
| 75 | + return (qat::Initialize()) ? Z_ERRNO : Z_OK; |
| 76 | +} |
| 77 | + |
| 78 | +struct qat_deflate* qat_deflate_state_init(int level, int wrap) |
| 79 | +{ |
| 80 | + if (qat_instances.empty()) { |
| 81 | + qat_instances = qat::Instance::Create(); |
| 82 | + } |
| 83 | + if (qat_instances.empty()) { |
| 84 | + return nullptr; |
| 85 | + } |
| 86 | + |
| 87 | + struct qat_deflate *qat_deflate = new struct qat_deflate; |
| 88 | + if (!qat_deflate) { |
| 89 | + return nullptr; |
| 90 | + } |
| 91 | + |
| 92 | + /* TODO(gustavoa): Find a way to utilize all the available instances for the same |
| 93 | + * process. |
| 94 | + */ |
| 95 | + qat_instances[0]->Start(); |
| 96 | + |
| 97 | + qat_deflate->qat_session = qat_create_session(level, wrap); |
| 98 | + |
| 99 | + qat_deflate->input_buffer_size = kInputSize; |
| 100 | + qat_deflate->input_buffer = qat::AllocBlockArray<uint8_t>(kInputSize, 0); |
| 101 | + qat_deflate->output_buffer_size = |
| 102 | + qat_deflate->qat_session->GetDeflateBound(qat_deflate->input_buffer_size); |
| 103 | + qat_deflate->pending_out = qat_deflate->output_buffer = |
| 104 | + qat::AllocBlockArray<uint8_t>(qat_deflate->output_buffer_size, 0); |
| 105 | + |
| 106 | + qat_deflate->pending_in_count = qat_deflate->pending_out_count = 0; |
| 107 | + |
| 108 | + if (!qat_deflate->input_buffer || !qat_deflate->output_buffer) { |
| 109 | + return nullptr; |
| 110 | + } |
| 111 | + |
| 112 | + return qat_deflate; |
| 113 | +} |
| 114 | + |
| 115 | +static unsigned qat_read_buf(z_streamp strm, struct qat_deflate* qat, unsigned size) |
| 116 | +{ |
| 117 | + unsigned len = strm->avail_in; |
| 118 | + |
| 119 | + if (len > size) { |
| 120 | + len = size; |
| 121 | + } |
| 122 | + if (len == 0) return 0; |
| 123 | + |
| 124 | + strm->avail_in -= len; |
| 125 | + strm->total_in += len; |
| 126 | + |
| 127 | + zmemcpy( |
| 128 | + qat->input_buffer + qat->pending_in_count, |
| 129 | + strm->next_in, |
| 130 | + len |
| 131 | + ); |
| 132 | + |
| 133 | + strm->next_in += len; |
| 134 | + qat->pending_in_count += len; |
| 135 | + |
| 136 | + return len; |
| 137 | +} |
| 138 | + |
| 139 | +void qat_flush_pending(deflate_state* s) |
| 140 | +{ |
| 141 | + unsigned len; |
| 142 | + z_streamp strm = s->strm; |
| 143 | + struct qat_deflate* qat = s->qat_s; |
| 144 | + |
| 145 | + len = qat->pending_out_count; |
| 146 | + if (len > strm->avail_out) len = strm->avail_out; |
| 147 | + if (len == 0) return; |
| 148 | + |
| 149 | + zmemcpy(strm->next_out, qat->pending_out, len); |
| 150 | + |
| 151 | + qat->pending_out += len; |
| 152 | + qat->pending_out_count -= len; |
| 153 | + strm->next_out += len; |
| 154 | + strm->avail_out -= len; |
| 155 | + strm->total_out += len; |
| 156 | + if (qat->pending_out_count == 0) { |
| 157 | + qat->pending_out = qat->output_buffer; |
| 158 | + } |
| 159 | +} |
| 160 | + |
| 161 | +static int qat_compress_pending(deflate_state*s, int flush) |
| 162 | +{ |
| 163 | + struct qat_deflate* qat = s->qat_s; |
| 164 | + uint32_t metadata_size; |
| 165 | + |
| 166 | + /* TODO(gustavoa): find a way to make qatzpp setup this number internally. */ |
| 167 | + cpaDcBufferListGetMetaSize(qat->qat_session->getInstance()->GetHandle(), 1, &metadata_size); |
| 168 | + |
| 169 | + auto job = qat->qat_session->Deflate( |
| 170 | + std::make_unique<qat::IOBuffers>( |
| 171 | + std::make_unique<qat::BufferListUser>( |
| 172 | + qat->input_buffer, |
| 173 | + qat->pending_in_count, |
| 174 | + metadata_size |
| 175 | + ), |
| 176 | + std::make_unique<qat::BufferListUser>( |
| 177 | + qat->output_buffer, |
| 178 | + qat->output_buffer_size, |
| 179 | + metadata_size |
| 180 | + ) |
| 181 | + ), (flush == Z_FINISH && s->strm->avail_in == 0) |
| 182 | + ); |
| 183 | + |
| 184 | + job->WaitCompletion(); |
| 185 | + |
| 186 | + /* |
| 187 | + * TODO(gustavoa): make QAT perform the checksum combine. |
| 188 | + */ |
| 189 | + if (s->wrap == 2) { |
| 190 | + s->strm->adler = crc32_combine( |
| 191 | + s->strm->adler, |
| 192 | + job->GetResults()->checksum, |
| 193 | + job->GetResults()->consumed |
| 194 | + ); |
| 195 | + } else if (s->wrap == 1) { |
| 196 | + s->strm->adler = adler32( |
| 197 | + s->strm->adler, |
| 198 | + qat->input_buffer, |
| 199 | + job->GetResults()->consumed |
| 200 | + ); |
| 201 | + } |
| 202 | + |
| 203 | + qat->pending_out_count = job->GetResults()->produced; |
| 204 | + qat->pending_in_count -= job->GetResults()->consumed; |
| 205 | + |
| 206 | + if(qat->pending_in_count != 0) { |
| 207 | + /* Copy any remaining bytes to the beginning of the buffer. */ |
| 208 | + zmemcpy( |
| 209 | + qat->input_buffer, |
| 210 | + qat->input_buffer + job->GetResults()->consumed, |
| 211 | + qat->pending_in_count |
| 212 | + ); |
| 213 | + } |
| 214 | + |
| 215 | + return 0; |
| 216 | +} |
| 217 | + |
| 218 | +qat_block_state qat_deflate_step(deflate_state* s, int flush) |
| 219 | +{ |
| 220 | + z_streamp strm = s->strm; |
| 221 | + struct qat_deflate* qat_state = s->qat_s; |
| 222 | + |
| 223 | + for (;;) { |
| 224 | + if (qat_state->pending_in_count < qat_state->input_buffer_size) { |
| 225 | + qat_read_buf( |
| 226 | + strm, |
| 227 | + qat_state, |
| 228 | + qat_state->input_buffer_size - qat_state->pending_in_count |
| 229 | + ); |
| 230 | + if (qat_state->pending_in_count < qat_state->input_buffer_size && flush == Z_NO_FLUSH) { |
| 231 | + return qat_block_need_more; |
| 232 | + } else { |
| 233 | + qat_compress_pending(s, flush); |
| 234 | + } |
| 235 | + if (strm->avail_in == 0) { |
| 236 | + break; |
| 237 | + } |
| 238 | + } else { |
| 239 | + qat_compress_pending(s, flush); |
| 240 | + } |
| 241 | + |
| 242 | + qat_flush_pending(s); |
| 243 | + if (strm->avail_out == 0) { |
| 244 | + return (flush == Z_FINISH) ? qat_block_finish_started : qat_block_need_more; |
| 245 | + } |
| 246 | + } |
| 247 | + |
| 248 | + if (flush == Z_FINISH) { |
| 249 | + qat_flush_pending(s); |
| 250 | + if (strm->avail_out == 0) { |
| 251 | + return qat_block_finish_started; |
| 252 | + } else { |
| 253 | + return qat_block_finish_done; |
| 254 | + } |
| 255 | + } |
| 256 | + |
| 257 | + qat_flush_pending(s); |
| 258 | + if (strm->avail_out == 0) { |
| 259 | + return qat_block_done; |
| 260 | + } |
| 261 | + |
| 262 | + return qat_block_need_more; |
| 263 | +} |
| 264 | + |
| 265 | +int qat_deflate_state_free(deflate_state* s) |
| 266 | +{ |
| 267 | + struct qat_deflate* qat_state = s->qat_s; |
| 268 | + if (qat_state->input_buffer) { |
| 269 | + qat::Free(qat_state->input_buffer); |
| 270 | + } |
| 271 | + if (qat_state->output_buffer) { |
| 272 | + qat::Free(qat_state->output_buffer); |
| 273 | + } |
| 274 | + |
| 275 | + qat_state->qat_session.reset(); |
| 276 | + delete qat_state; |
| 277 | + s->qat_s = nullptr; |
| 278 | + |
| 279 | + return Z_OK; |
| 280 | +} |
| 281 | + |
| 282 | +struct qat_deflate *qat_deflate_copy(deflate_state *ss) |
| 283 | +{ |
| 284 | + struct qat_deflate *sqat = ss->qat_s; |
| 285 | + struct qat_deflate *dqat = nullptr; |
| 286 | + |
| 287 | + if (!sqat) { |
| 288 | + return nullptr; |
| 289 | + } |
| 290 | + |
| 291 | + dqat = new struct qat_deflate; |
| 292 | + |
| 293 | + dqat->qat_session = qat_create_session(ss->level, ss->wrap); |
| 294 | + |
| 295 | + dqat->input_buffer_size = sqat->input_buffer_size; |
| 296 | + dqat->input_buffer = qat::AllocBlockArray<uint8_t>(dqat->input_buffer_size, 0); |
| 297 | + |
| 298 | + dqat->output_buffer_size = sqat->output_buffer_size; |
| 299 | + dqat->output_buffer = qat::AllocBlockArray<uint8_t>(dqat->output_buffer_size, 0); |
| 300 | + |
| 301 | + dqat->pending_in_count = sqat->pending_in_count; |
| 302 | + dqat->pending_out_count = sqat->pending_out_count; |
| 303 | + |
| 304 | + dqat->pending_out = |
| 305 | + dqat->output_buffer + (sqat->pending_out - sqat->output_buffer); |
| 306 | + |
| 307 | + zmemcpy(dqat->input_buffer, sqat->input_buffer, dqat->input_buffer_size); |
| 308 | + zmemcpy(dqat->output_buffer, sqat->output_buffer, dqat->output_buffer_size); |
| 309 | + |
| 310 | + return dqat; |
| 311 | +} |
| 312 | + |
0 commit comments