Ядро: 5.15.0-70-generic
.
Я раньше (параметр is_remap
=0) в подобной задаче при получении входного запроса bio
, формировал свой запрос bio
к вышестоящему устройству, и всё работало. Но медленно. Скорость записи на флешку была ~460 kb/sec. Потом я решил пробрасывать запрос bio
вышестоящему устройству напрямую (is_remap
=1). Если при этом не пытаться модифицировать данные, то всё работает, и скорость возрастает до 1.8 мб/сек т.е. ~ в 4 раза. Но если начать модифицировать данные (а это нужно), то работает только запись. При чтении, dd
получает нерасшифрованные данные, а bio
в stackbd_end_io_read_cloned
(перед этим клонированный с помощью bio_clone_fast
в stackbd_io_fn_remap
) вообще имеет нулевой размер. При этом размер obio
ненулевой. Как такое вообще происходит, и как сделать правильно?
Интересно, что если в stackbd_end_io_read_cloned
менять данные после вызова bio_endio
, то в dd
прилетают расшифрованные данные, но чую, что так делать не правильно. Что подтверждается тем, что fsck
после mkfs
вешает систему.
Вот, например, я читаю сектор:
user@astra-1:~/git/stackbd/module$ sudo dd if=/dev/stackbd0 count=1 | hexdump -C
00000000 63 d0 18 e5 e3 ee fb a6 ee e9 fc 88 8a a8 a8 88 |c...............|
00000010 8a 88 88 88 88 70 88 88 98 88 8c 88 88 88 88 88 |.....p..........|
00000020 88 48 26 8b 88 b3 88 88 88 88 88 88 8a 88 88 88 |.H&.............|
00000030 89 88 8e 88 88 88 88 88 88 88 88 88 88 88 88 88 |................|
00000040 08 88 a1 57 55 08 9b c6 c7 a8 c6 c9 c5 cd a8 a8 |...WU...........|
00000050 a8 a8 ce c9 dc bb ba a8 a8 a8 86 97 36 ff f4 24 |............6..$|
00000060 aa 48 fc 83 de 3c 86 33 8f 88 45 98 d6 63 78 ba |.H...<.3..E..cx.|
00000070 6c 45 9e 45 91 63 76 dc e0 e1 fb a8 e1 fb a8 e6 |lE.E.cv.........|
00000080 e7 fc a8 e9 a8 ea e7 e7 fc e9 ea e4 ed a8 ec e1 |................|
00000090 fb e3 a6 a8 a8 d8 e4 ed e9 fb ed a8 e1 e6 fb ed |................|
000000a0 fa fc a8 e9 a8 ea e7 e7 fc e9 ea e4 ed a8 ee e4 |................|
000000b0 e7 f8 f8 f1 a8 e9 e6 ec 85 82 f8 fa ed fb fb a8 |................|
000000c0 e9 e6 f1 a8 e3 ed f1 a8 fc e7 a8 fc fa f1 a8 e9 |................|
000000d0 ef e9 e1 e6 a8 a6 a6 a6 a8 85 82 88 88 88 88 88 |................|
000000e0 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 88 |................|
*
000001f0 88 88 88 88 88 88 88 88 88 88 88 88 88 88 dd 22 |..............."|
1+0 записей получено
1+0 записей отправлено
512 байт скопировано, 0,0063565 s, 80,5 kB/s
00000200
user@astra-1:~/git/stackbd/module$
И вот, что вижу в логе:
Jun 9 15:24:11 astra-1 kernel: stackbd [task=00000000c60564d5] stackbd_io_fn_remap: HIT.r.1
Jun 9 15:24:11 astra-1 kernel: debugbd [task=00000000c60564d5] debugbd_submit_bio: debugbd: make request read block 0 #pages 0 total-size 16384
Jun 9 15:24:11 astra-1 kernel: stackbd [task=00000000c60564d5] stackbd_io_fn_remap: HIT.r.2
Jun 9 15:24:11 astra-1 kernel: stackbd [task=0000000089abc07d] stackbd_end_io_read_cloned: HIT.1
Jun 9 15:24:11 astra-1 kernel: stackbd [task=0000000089abc07d] stackbd_end_io_read_cloned: HIT.2: obio.size=16384; bio.size=0
Jun 9 15:24:11 astra-1 kernel: stackbd [task=0000000089abc07d] stackbd_end_io_read_cloned: HIT.3
Jun 9 15:24:11 astra-1 kernel: stackbd [task=0000000089abc07d] stackbd_end_io_read_cloned: HIT.4
Jun 9 15:24:11 astra-1 kernel: stackbd [task=00000000c60564d5] stackbd_io_fn_remap: HIT.r.1
Jun 9 15:24:11 astra-1 kernel: debugbd [task=00000000c60564d5] debugbd_submit_bio: debugbd: make request read block 32 #pages 0 total-size 32768
Jun 9 15:24:11 astra-1 kernel: stackbd [task=00000000c60564d5] stackbd_io_fn_remap: HIT.r.2
Jun 9 15:24:11 astra-1 kernel: stackbd [task=0000000089abc07d] stackbd_end_io_read_cloned: HIT.1
Jun 9 15:24:11 astra-1 kernel: stackbd [task=0000000089abc07d] stackbd_end_io_read_cloned: HIT.2: obio.size=32768; bio.size=0
Jun 9 15:24:11 astra-1 kernel: stackbd [task=0000000089abc07d] stackbd_end_io_read_cloned: HIT.3
debugbd - это тот же драйвер, только выводящий информацию о запросах, для отладки.
Исходный код драйвера stackbd:
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/init.h>
#include <linux/version.h>
#include <linux/kernel.h> // printk()
#include <linux/fs.h> // everything...
#include <linux/errno.h> // error codes
#include <linux/types.h> // size_t
#include <linux/vmalloc.h>
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/hdreg.h>
#include <linux/kthread.h>
#include <trace/events/block.h>
#include "logging.h"
#include "../common/stackbd.h"
#define STACKBD_BDEV_MODE (FMODE_READ | FMODE_WRITE | FMODE_EXCL)
#define KERNEL_SECTOR_SHIFT 9
#define KERNEL_SECTOR_SIZE (1 << KERNEL_SECTOR_SHIFT)
#define DECLARE_BIO_VEC struct bio_vec
#define ACCESS_BIO_VEC(x) (x)
#define DECLARE_BVEC_ITER struct bvec_iter
#define BIO_SET_SECTOR(bio, sec) (bio)->bi_iter.bi_sector = (sec)
#define BIO_GET_SECTOR(bio) (bio)->bi_iter.bi_sector
#define BIO_GET_SIZE(bio) (bio)->bi_iter.bi_size
#define BIO_SET_BDEV(bio, bdev) bio_set_dev((bio), (bdev));
//#ifdef CONFIG_LBDAF
#define SEC_FMT "llu"
//#else
//#define SEC_FMT "lu"
//#endif
MODULE_LICENSE("Dual BSD/GPL");
static int major_num = 0;
module_param(major_num, int, 0);
static int LOGICAL_BLOCK_SIZE = 512;
module_param(LOGICAL_BLOCK_SIZE, int, 0);
static bool is_remap = false;
module_param(is_remap, bool, 0);
typedef struct
{
char path[PATH_MAX];
fmode_t mode; // используется в aldcc_start / aldcc_stop
bool is_bdev_raw_ok;
struct block_device *bdev_raw;
} stackbd_target_t;
/*
* The internal representation of our device.
*/
static struct stackbd_t {
sector_t capacity; /* Sectors */
struct gendisk *gd;
spinlock_t lock;
struct bio_list bio_list;
struct task_struct *thread;
int is_active;
stackbd_target_t tgt;
/* Our request queue */
struct request_queue *queue;
} stackbd;
static DECLARE_WAIT_QUEUE_HEAD(req_event);
typedef void (* t_stackbd_io_fn)(struct bio *);
static t_stackbd_io_fn p_stackbd_io_fn = NULL;
static struct bio_set bs;
int buffer_read(
struct stackbd_t *dev,
unsigned long sector,
unsigned long nsect,
char *buffer
)
{
int result = 0;
unsigned nsize = nsect << KERNEL_SECTOR_SHIFT;
int npages = ((nsize - 1) >> PAGE_SHIFT) + 1;
struct bio *bio;
struct block_device *bdev = dev->tgt.bdev_raw;
//PINFO("begin; sector=%ld; nsect=%ld; buffer=%p\n", sector, nsect, buffer);
if(unlikely(!dev->tgt.is_bdev_raw_ok))
{
PERROR("bdev is NULL!\n");
result = -EFAULT;
goto out;
}
bio = bio_alloc(GFP_NOIO, npages);
if(unlikely(!bio))
{
PERROR("bio_alloc failed!\n");
result = -ENOMEM;
goto out;
}
BIO_SET_BDEV(bio, bdev);
BIO_SET_SECTOR(bio, sector);
bio_set_op_attrs(bio, REQ_OP_READ, REQ_PREFLUSH);
{
char *ptr = buffer;
do
{
struct page *page;
page = virt_to_page(ptr);
if(unlikely(!page))
{
PERROR("virt_to_page failed!\n");
result = -ENOMEM;
break;
}
{
unsigned op = offset_in_page(ptr);
unsigned this_step = min((unsigned)(PAGE_SIZE - op), nsize);
bio_add_page(bio, page, this_step, op);
nsize -= this_step;
ptr += this_step;
}
} while(nsize > 0);
if(likely(!result))
{
result = submit_bio_wait(bio);
}
bio_put(bio);
}
out:
//PINFO("end (%d)\n", result);
return result;
}
int buffer_write(
struct stackbd_t *dev,
unsigned long sector,
unsigned long nsect,
char *buffer
)
{
int result = 0;
unsigned nsize = nsect << KERNEL_SECTOR_SHIFT;
int npages = ((nsize - 1) >> PAGE_SHIFT) + 1;
struct bio *bio;
struct block_device *bdev = dev->tgt.bdev_raw;
//PINFO("begin; sector=%ld; nsect=%ld; buffer=%p\n", sector, nsect, buffer);
if(unlikely(!dev->tgt.is_bdev_raw_ok))
{
PERROR("bdev is NULL!\n");
result = -EFAULT;
goto out;
}
bio = bio_alloc(GFP_NOIO, npages);
if(unlikely(!bio))
{
PERROR("bio_alloc failed!\n");
result = -ENOMEM;
goto out;
}
BIO_SET_BDEV(bio, bdev);
BIO_SET_SECTOR(bio, sector);
bio_set_op_attrs(bio, REQ_OP_WRITE, REQ_PREFLUSH);
{
char *ptr = buffer;
do
{
struct page *page = virt_to_page(ptr);
if(unlikely(!page))
{
PERROR("alloc page failed!\n");
result = -ENOMEM;
break;
}
{
unsigned op = offset_in_page(ptr);
unsigned this_step = min((unsigned)(PAGE_SIZE - op), nsize);
bio_add_page(bio, page, this_step, op);
nsize -= this_step;
ptr += this_step;
}
} while(nsize > 0);
if(likely(!result))
{
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 8, 0)
result = submit_bio_wait(bio);
#else
result = submit_bio_wait(WRITE | REQ_FLUSH, bio);
#endif
}
bio_put(bio);
}
out:
//PINFO("end (%d)\n", result);
return result;
}
static void stackbd_end_io_read_cloned(struct bio *bio)
{
struct bio *obio = bio->bi_private;
PINFO("HIT.1");
if (bio_data_dir(bio) == READ)
{
DECLARE_BIO_VEC bvec;
DECLARE_BVEC_ITER iter;
PINFO("HIT.2: obio.size=%u; bio.size=%u", BIO_GET_SIZE(obio), BIO_GET_SIZE(bio));
bio_for_each_segment(bvec, bio, iter)
{
char *p = page_address(ACCESS_BIO_VEC(bvec).bv_page) + ACCESS_BIO_VEC(bvec).bv_offset;
int len = ACCESS_BIO_VEC(bvec).bv_len;
int i;
print_hex_dump(KERN_INFO, "readed data (1-st 16 bytes) ", DUMP_PREFIX_OFFSET, 16, 1, p, 16, false);
for(i = 0; i < len; i++)
{
//*p++ ^= 0x12345678;
*p++ ^= 0x88;
}
//p += len;
}
PINFO("HIT.3");
bio_put(bio);
bio_endio(obio);
}
else
{
bio_put(bio);
bio_endio(obio);
}
//bio_put(bio);
PINFO("HIT.4");
}
static void stackbd_io_fn_remap(struct bio *bio)
{
DECLARE_BIO_VEC bvec;
DECLARE_BVEC_ITER iter;
struct bio *cbio = bio_clone_fast(bio, GFP_NOIO, &bs);
BIO_SET_BDEV(cbio, stackbd.tgt.bdev_raw);
cbio->bi_end_io = stackbd_end_io_read_cloned;
cbio->bi_private = bio;
//submit_bio_noacct(cbio);
//trace_block_bio_remap(/*bdev_get_queue(stackbd.bdev_raw), */bio,
// stackbd.tgt.bdev_raw->bd_dev, BIO_GET_SECTOR(bio));
if (bio_data_dir(bio) == READ)
{
PINFO("HIT.r.1");
submit_bio_noacct(cbio);
PINFO("HIT.r.2");
}
else
{
PINFO("HIT.w.1");
bio_for_each_segment(bvec, cbio, iter)
{
char *p = page_address(ACCESS_BIO_VEC(bvec).bv_page) + ACCESS_BIO_VEC(bvec).bv_offset;
int len = ACCESS_BIO_VEC(bvec).bv_len;
int i;
for(i = 0; i < len; i++)
{
// *p++ ^= 0x12345678;
*p++ ^= 0x88;
}
print_hex_dump(KERN_INFO, "writed data (1-st 16 bytes) ", DUMP_PREFIX_OFFSET, 16, 1, p, 16, false);
//p += len;
}
PINFO("HIT.w.2");
submit_bio_noacct(cbio);
PINFO("HIT.w.3");
}
}
static void my_bio_complete(struct bio *bio, int ret)
{
if (ret)
bio_io_error(bio);
else
bio_endio(bio);
}
static void stackbd_io_fn_clone(struct bio *bio)
{
int res;
DECLARE_BIO_VEC bvec;
DECLARE_BVEC_ITER iter;
sector_t sector = BIO_GET_SECTOR(bio);
int size = BIO_GET_SIZE(bio);
int nsect = size >> KERNEL_SECTOR_SHIFT;
char *src, *p;
do
{
if (bio_data_dir(bio) == READ)
{
p = src = kmalloc(size, GFP_KERNEL);
if (!src)
{
PERROR("Unable to allocate read buffer!\n");
res = -ENOMEM;
break;
}
do
{
res = buffer_read(&stackbd, sector, nsect, src);
if (unlikely(res))
{
PERROR("i/o error while read!\n");
break;
}
bio_for_each_segment(bvec, bio, iter)
{
char *dst = page_address(ACCESS_BIO_VEC(bvec).bv_page) + ACCESS_BIO_VEC(bvec).bv_offset;
int len = ACCESS_BIO_VEC(bvec).bv_len;
memcpy(dst, p, len);
p += len;
}
}
while (0);
}
else
{
p = src = kmalloc(size, GFP_KERNEL);
if (!src)
{
PERROR("Unable to allocate write buffer!\n");
res = -ENOMEM;
break;
}
bio_for_each_segment(bvec, bio, iter)
{
char *dst = page_address(ACCESS_BIO_VEC(bvec).bv_page) + ACCESS_BIO_VEC(bvec).bv_offset;
int len = ACCESS_BIO_VEC(bvec).bv_len;
memcpy(p, dst, len);
p += len;
}
res = buffer_write(&stackbd, sector, nsect, src);
if (unlikely(res))
{
PERROR("i/o error while write!\n");
}
}
kfree(src);
}
while (0);
my_bio_complete(bio, res);
} // stackbd_io_fn_clone
static int stackbd_threadfn(void *data)
{
struct bio *bio;
set_user_nice(current, -20);
while (!kthread_should_stop())
{
/* wake_up() is after adding bio to list. No need for condition */
wait_event_interruptible(req_event, kthread_should_stop() ||
!bio_list_empty(&stackbd.bio_list));
spin_lock_irq(&stackbd.lock);
if (bio_list_empty(&stackbd.bio_list))
{
spin_unlock_irq(&stackbd.lock);
continue;
}
bio = bio_list_pop(&stackbd.bio_list);
spin_unlock_irq(&stackbd.lock);
p_stackbd_io_fn(bio);
}
return 0;
}
// Handle an I/O request.
static blk_qc_t stackbd_submit_bio(struct bio *bio)
{
/*PINFO("stackbd: make request %-5s block %-12" SEC_FMT " #pages %-4hu total-size %-10u\n",
bio_data_dir(bio) == WRITE ? "write" : "read",
BIO_GET_SECTOR(bio),
bio->bi_vcnt,
BIO_GET_SIZE(bio)
);*/
spin_lock_irq(&stackbd.lock);
if (!stackbd.tgt.bdev_raw)
{
PERROR("Request before bdev_raw is ready, aborting\n");
goto abort;
}
if (!stackbd.is_active)
{
PERROR("Device not active yet, aborting\n");
goto abort;
}
bio_list_add(&stackbd.bio_list, bio);
wake_up(&req_event);
spin_unlock_irq(&stackbd.lock);
goto exit;
abort:
spin_unlock_irq(&stackbd.lock);
PERROR("<%p> Abort request\n", bio);
bio_io_error(bio);
exit:
return BLK_QC_T_NONE;
}
static int stackbd_target_open(stackbd_target_t *p_tdev)
{
int res = 0;
char *path = p_tdev->path;
PINFO("Open %s\n", path);
{
struct block_device *bdev_raw = blkdev_get_by_path(path, p_tdev->mode, p_tdev);
p_tdev->bdev_raw = bdev_raw;
if (unlikely(IS_ERR(bdev_raw)))
{
res = PTR_ERR(bdev_raw);
PINFO("error opening raw device %s <%d>\n", path, res);
}
p_tdev->is_bdev_raw_ok = !res;
return res;
}
}
static void stackbd_target_close(stackbd_target_t *p_tdev)
{
if (p_tdev->is_bdev_raw_ok)
{
blkdev_put(p_tdev->bdev_raw, p_tdev->mode);
p_tdev->bdev_raw = NULL;
p_tdev->is_bdev_raw_ok = false;
}
}
static int stackbd_start(char dev_path[])
{
unsigned max_sectors;
sector_t lba;
stackbd_target_t *p_tgt = &stackbd.tgt;
strcpy(p_tgt->path, dev_path);
p_tgt->mode = STACKBD_BDEV_MODE;
if(stackbd_target_open(p_tgt) < 0)
{
PERROR("Error while stackbd_target_open(..)!");
return -EFAULT;
}
/* Set up our internal device */
lba = i_size_read(p_tgt->bdev_raw->bd_inode) >> KERNEL_SECTOR_SHIFT;
stackbd.capacity = lba;//get_capacity(stackbd.bdev_raw->bd_disk);
PINFO("Device real capacity: %" SEC_FMT "\n", stackbd.capacity);
set_capacity(stackbd.gd, stackbd.capacity);
max_sectors = queue_max_hw_sectors(bdev_get_queue(p_tgt->bdev_raw));
blk_queue_max_hw_sectors(stackbd.queue, max_sectors);
PINFO("Max sectors: %u\n", max_sectors);
stackbd.thread = kthread_create(stackbd_threadfn, NULL,
stackbd.gd->disk_name);
if (IS_ERR(stackbd.thread))
{
PERROR("error kthread_create <%lu>\n", PTR_ERR(stackbd.thread));
goto error_after_bdev;
}
PINFO("done initializing successfully\n");
stackbd.is_active = 1;
wake_up_process(stackbd.thread);
return 0;
error_after_bdev:
stackbd_target_close(p_tgt);
return -EFAULT;
}
static int stackbd_ioctl(struct block_device *bdev, fmode_t mode,
unsigned int cmd, unsigned long arg)
{
char dev_path[80];
void __user *argp = (void __user *)arg;
switch (cmd)
{
case STACKBD_DO_IT:
PINFO("\n*** DO IT!!!!!!! ***\n\n");
if (copy_from_user(dev_path, argp, sizeof(dev_path)))
return -EFAULT;
return stackbd_start(dev_path);
default:
return -ENOTTY;
}
}
/*
* The HDIO_GETGEO ioctl is handled in blkdev_ioctl(), which
* calls this. We need to implement getgeo, since we can't
* use tools such as fdisk to partition the drive otherwise.
*/
int stackbd_getgeo(struct block_device * block_device, struct hd_geometry * geo)
{
long size;
/* We have no real geometry, of course, so make something up. */
size = stackbd.capacity * (LOGICAL_BLOCK_SIZE / KERNEL_SECTOR_SIZE);
geo->cylinders = (size & ~0x3f) >> 6;
geo->heads = 4;
geo->sectors = 16;
geo->start = 0;
return 0;
}
/*
* The device operations structure.
*/
static struct block_device_operations stackbd_ops = {
.owner = THIS_MODULE,
.submit_bio = stackbd_submit_bio,
.getgeo = stackbd_getgeo,
.ioctl = stackbd_ioctl,
};
static int __init stackbd_init(void)
{
PINFO("is_remap=%d\n", is_remap);
if (is_remap)
{
p_stackbd_io_fn = stackbd_io_fn_remap;
}
else
{
p_stackbd_io_fn = stackbd_io_fn_clone;
}
/* Set up our internal device */
spin_lock_init(&stackbd.lock);
/* Get registered */
if ((major_num = register_blkdev(major_num, STACKBD_NAME)) < 0)
{
PERROR("unable to get major number\n");
goto error_after_alloc_queue;
}
/* Gendisk structure */
if (!(stackbd.gd = blk_alloc_disk(NUMA_NO_NODE)))
{
PERROR("unable to alloc disk\n");
goto error_after_register_blkdev;
}
stackbd.gd->major = major_num;
stackbd.gd->first_minor = 0;
stackbd.gd->minors = 1 << 4;
stackbd.gd->fops = &stackbd_ops;
stackbd.gd->private_data = &stackbd;
strcpy(stackbd.gd->disk_name, STACKBD_NAME_0);
stackbd.queue = stackbd.gd->queue;
if(bioset_init(&bs, 64, 0, BIOSET_NEED_BVECS) < 0)
//if(bioset_init(&bs, BIO_POOL_SIZE, 0, 0) < 0)
{
PERROR( "Cannot allocate bioset");
goto error_after_register_blkdev;
}
if(add_disk(stackbd.gd) < 0)
{
PERROR("unable to add disk\n");
goto error_after_register_blkdev;
}
PINFO("init done\n");
return 0;
error_after_register_blkdev:
unregister_blkdev(major_num, STACKBD_NAME);
error_after_alloc_queue:
blk_cleanup_queue(stackbd.queue);
return -EFAULT;
}
static void __exit stackbd_exit(void)
{
PINFO("exit\n");
if (stackbd.is_active)
{
kthread_stop(stackbd.thread);
stackbd_target_close(&stackbd.tgt);
}
del_gendisk(stackbd.gd);
put_disk(stackbd.gd);
bioset_exit(&bs);
unregister_blkdev(major_num, STACKBD_NAME);
blk_cleanup_queue(stackbd.queue);
}
module_init(stackbd_init);
module_exit(stackbd_exit);
https://github.com/zenbooster/stackbd/blob/5.15.0-70-generic/module/main.c