/*
 * io_uring + O_DIRECT 对比小 benchmark
 * 文章：post/storage/79-o-direct-io-uring/
 */
#define _GNU_SOURCE
#include <fcntl.h>
#include <liburing.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <unistd.h>

#ifndef O_DIRECT
#define O_DIRECT 040000
#endif

#define BS 4096
#define OPS 10000

static double now_sec(void) {
    struct timespec ts;
    clock_gettime(CLOCK_MONOTONIC, &ts);
    return ts.tv_sec + ts.tv_nsec * 1e-9;
}

static int run_bench(const char *label, int use_direct, int use_reg) {
    const char *path = "/tmp/iouring_odirect_bench.dat";
    unlink(path);
    int flags = O_RDWR | O_CREAT | O_TRUNC;
    if (use_direct)
        flags |= O_DIRECT;
    int fd = open(path, flags, 0644);
    if (fd < 0) {
        perror("open");
        return 1;
    }
    ftruncate(fd, BS * OPS);

    void *buf = NULL;
    if (posix_memalign(&buf, BS, BS) != 0) {
        perror("posix_memalign");
        close(fd);
        return 1;
    }
    memset(buf, 0xAB, BS);

    struct io_uring ring;
    io_uring_queue_init(256, &ring, 0);
    if (use_reg) {
        struct iovec iov = { buf, BS };
        if (io_uring_register_buffers(&ring, &iov, 1) < 0) {
            perror("io_uring_register_buffers");
            io_uring_queue_exit(&ring);
            free(buf);
            close(fd);
            return 1;
        }
    }

    double t0 = now_sec();
    for (int i = 0; i < OPS; i++) {
        struct io_uring_sqe *sqe = io_uring_get_sqe(&ring);
        if (use_reg) {
            io_uring_prep_write(sqe, fd, NULL, BS, (off_t)i * BS);
            sqe->buf_index = 0;
        } else {
            io_uring_prep_write(sqe, fd, buf, BS, (off_t)i * BS);
        }
        io_uring_submit(&ring);
        struct io_uring_cqe *cqe;
        io_uring_wait_cqe(&ring, &cqe);
        if (cqe->res < 0) {
            fprintf(stderr, "%s err %d\n", label, cqe->res);
            io_uring_queue_exit(&ring);
            free(buf);
            close(fd);
            return 1;
        }
        io_uring_cqe_seen(&ring, cqe);
    }
    double dt = now_sec() - t0;
    printf("%s: %.3f s, %.0f IOPS\n", label, dt, OPS / dt);
    close(fd);
    unlink(path);
    io_uring_queue_exit(&ring);
    free(buf);
    return 0;
}

int main(void) {
    if (run_bench("buffered io_uring write", 0, 0) != 0)
        return 1;
    if (run_bench("O_DIRECT io_uring write", 1, 0) != 0)
        return 1;
  if (run_bench("O_DIRECT + register_buffers", 1, 1) != 0) {
        fprintf(stderr, "register_buffers path failed (见文章说明)\n");
    }
    return 0;
}
