T3Q was born on a highway. That's where most accidents happen.

This commit is contained in:
Raziel K. Crowe 2022-03-22 19:03:46 +05:00
parent b8e28f2bc3
commit e770879906
27 changed files with 476 additions and 141 deletions

View File

@ -61,6 +61,7 @@
#include <linux/hdreg.h>
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/major.h>
#include <linux/mutex.h>
#include <linux/fs.h>
#include <linux/blk-mq.h>

View File

@ -68,6 +68,7 @@
#include <linux/delay.h>
#include <linux/init.h>
#include <linux/blk-mq.h>
#include <linux/major.h>
#include <linux/mutex.h>
#include <linux/completion.h>
#include <linux/wait.h>

View File

@ -184,6 +184,7 @@ static int print_unex = 1;
#include <linux/ioport.h>
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/major.h>
#include <linux/platform_device.h>
#include <linux/mod_devicetable.h>
#include <linux/mutex.h>

View File

@ -16,6 +16,7 @@
#include <linux/fd.h>
#include <linux/slab.h>
#include <linux/blk-mq.h>
#include <linux/major.h>
#include <linux/mutex.h>
#include <linux/hdreg.h>
#include <linux/kernel.h>

View File

@ -859,9 +859,15 @@ static int virtblk_probe(struct virtio_device *vdev)
virtio_cread(vdev, struct virtio_blk_config, max_discard_seg,
&v);
/*
* max_discard_seg == 0 is out of spec but we always
* handled it.
*/
if (!v)
v = sg_elems - 2;
blk_queue_max_discard_segments(q,
min_not_zero(v,
MAX_DISCARD_SEGMENTS));
min(v, MAX_DISCARD_SEGMENTS));
blk_queue_flag_set(QUEUE_FLAG_DISCARD, q);
}

View File

@ -42,6 +42,7 @@
#include <linux/cdrom.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/major.h>
#include <linux/mutex.h>
#include <linux/scatterlist.h>
#include <linux/bitmap.h>

View File

@ -749,7 +749,6 @@ static const struct acpi_device_id tgl_pinctrl_acpi_match[] = {
{ "INT34C5", (kernel_ulong_t)&tgllp_soc_data },
{ "INT34C6", (kernel_ulong_t)&tglh_soc_data },
{ "INTC1055", (kernel_ulong_t)&tgllp_soc_data },
{ "INTC1057", (kernel_ulong_t)&tgllp_soc_data },
{ }
};
MODULE_DEVICE_TABLE(acpi, tgl_pinctrl_acpi_match);

View File

@ -14,6 +14,7 @@
#define KMSG_COMPONENT "dasd"
#include <linux/interrupt.h>
#include <linux/major.h>
#include <linux/fs.h>
#include <linux/blkpg.h>

View File

@ -585,6 +585,12 @@ static int rockchip_spi_slave_abort(struct spi_controller *ctlr)
{
struct rockchip_spi *rs = spi_controller_get_devdata(ctlr);
if (atomic_read(&rs->state) & RXDMA)
dmaengine_terminate_sync(ctlr->dma_rx);
if (atomic_read(&rs->state) & TXDMA)
dmaengine_terminate_sync(ctlr->dma_tx);
atomic_set(&rs->state, 0);
spi_enable_chip(rs, false);
rs->slave_abort = true;
spi_finalize_current_transfer(ctlr);
@ -654,7 +660,7 @@ static int rockchip_spi_probe(struct platform_device *pdev)
struct spi_controller *ctlr;
struct resource *mem;
struct device_node *np = pdev->dev.of_node;
u32 rsd_nsecs;
u32 rsd_nsecs, num_cs;
bool slave_mode;
slave_mode = of_property_read_bool(np, "spi-slave");
@ -764,8 +770,9 @@ static int rockchip_spi_probe(struct platform_device *pdev)
* rk spi0 has two native cs, spi1..5 one cs only
* if num-cs is missing in the dts, default to 1
*/
if (of_property_read_u16(np, "num-cs", &ctlr->num_chipselect))
ctlr->num_chipselect = 1;
if (of_property_read_u32(np, "num-cs", &num_cs))
num_cs = 1;
ctlr->num_chipselect = num_cs;
ctlr->use_gpio_descriptors = true;
}
ctlr->dev.of_node = pdev->dev.of_node;

View File

@ -166,14 +166,13 @@ void virtio_add_status(struct virtio_device *dev, unsigned int status)
}
EXPORT_SYMBOL_GPL(virtio_add_status);
int virtio_finalize_features(struct virtio_device *dev)
/* Do some validation, then set FEATURES_OK */
static int virtio_features_ok(struct virtio_device *dev)
{
int ret = dev->config->finalize_features(dev);
unsigned status;
int ret;
might_sleep();
if (ret)
return ret;
ret = arch_has_restricted_virtio_memory_access();
if (ret) {
@ -202,7 +201,6 @@ int virtio_finalize_features(struct virtio_device *dev)
}
return 0;
}
EXPORT_SYMBOL_GPL(virtio_finalize_features);
static int virtio_dev_probe(struct device *_d)
{
@ -239,17 +237,6 @@ static int virtio_dev_probe(struct device *_d)
driver_features_legacy = driver_features;
}
/*
* Some devices detect legacy solely via F_VERSION_1. Write
* F_VERSION_1 to force LE config space accesses before FEATURES_OK for
* these when needed.
*/
if (drv->validate && !virtio_legacy_is_little_endian()
&& device_features & BIT_ULL(VIRTIO_F_VERSION_1)) {
dev->features = BIT_ULL(VIRTIO_F_VERSION_1);
dev->config->finalize_features(dev);
}
if (device_features & (1ULL << VIRTIO_F_VERSION_1))
dev->features = driver_features & device_features;
else
@ -260,13 +247,26 @@ static int virtio_dev_probe(struct device *_d)
if (device_features & (1ULL << i))
__virtio_set_bit(dev, i);
err = dev->config->finalize_features(dev);
if (err)
goto err;
if (drv->validate) {
u64 features = dev->features;
err = drv->validate(dev);
if (err)
goto err;
/* Did validation change any features? Then write them again. */
if (features != dev->features) {
err = dev->config->finalize_features(dev);
if (err)
goto err;
}
}
err = virtio_finalize_features(dev);
err = virtio_features_ok(dev);
if (err)
goto err;
@ -490,7 +490,11 @@ int virtio_device_restore(struct virtio_device *dev)
/* We have a driver! */
virtio_add_status(dev, VIRTIO_CONFIG_S_DRIVER);
ret = virtio_finalize_features(dev);
ret = dev->config->finalize_features(dev);
if (ret)
goto err;
ret = virtio_features_ok(dev);
if (ret)
goto err;

View File

@ -1491,7 +1491,6 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
container_of(work, struct btrfs_fs_info, reclaim_bgs_work);
struct btrfs_block_group *bg;
struct btrfs_space_info *space_info;
LIST_HEAD(again_list);
if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags))
return;
@ -1562,18 +1561,14 @@ void btrfs_reclaim_bgs_work(struct work_struct *work)
div64_u64(zone_unusable * 100, bg->length));
trace_btrfs_reclaim_block_group(bg);
ret = btrfs_relocate_chunk(fs_info, bg->start);
if (ret && ret != -EAGAIN)
if (ret)
btrfs_err(fs_info, "error relocating chunk %llu",
bg->start);
next:
btrfs_put_block_group(bg);
spin_lock(&fs_info->unused_bgs_lock);
if (ret == -EAGAIN && list_empty(&bg->bg_list))
list_add_tail(&bg->bg_list, &again_list);
else
btrfs_put_block_group(bg);
}
list_splice_tail(&again_list, &fs_info->reclaim_bgs);
spin_unlock(&fs_info->unused_bgs_lock);
mutex_unlock(&fs_info->reclaim_bgs_lock);
btrfs_exclop_finish(fs_info);

View File

@ -1566,32 +1566,13 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
struct btrfs_path *p,
int write_lock_level)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *b;
int root_lock = 0;
int level = 0;
if (p->search_commit_root) {
/*
* The commit roots are read only so we always do read locks,
* and we always must hold the commit_root_sem when doing
* searches on them, the only exception is send where we don't
* want to block transaction commits for a long time, so
* we need to clone the commit root in order to avoid races
* with transaction commits that create a snapshot of one of
* the roots used by a send operation.
*/
if (p->need_commit_sem) {
down_read(&fs_info->commit_root_sem);
b = btrfs_clone_extent_buffer(root->commit_root);
up_read(&fs_info->commit_root_sem);
if (!b)
return ERR_PTR(-ENOMEM);
} else {
b = root->commit_root;
atomic_inc(&b->refs);
}
b = root->commit_root;
atomic_inc(&b->refs);
level = btrfs_header_level(b);
/*
* Ensure that all callers have set skip_locking when
@ -1657,6 +1638,42 @@ static struct extent_buffer *btrfs_search_slot_get_root(struct btrfs_root *root,
return b;
}
/*
* Replace the extent buffer at the lowest level of the path with a cloned
* version. The purpose is to be able to use it safely, after releasing the
* commit root semaphore, even if relocation is happening in parallel, the
* transaction used for relocation is committed and the extent buffer is
* reallocated in the next transaction.
*
* This is used in a context where the caller does not prevent transaction
* commits from happening, either by holding a transaction handle or holding
* some lock, while it's doing searches through a commit root.
* At the moment it's only used for send operations.
*/
static int finish_need_commit_sem_search(struct btrfs_path *path)
{
const int i = path->lowest_level;
const int slot = path->slots[i];
struct extent_buffer *lowest = path->nodes[i];
struct extent_buffer *clone;
ASSERT(path->need_commit_sem);
if (!lowest)
return 0;
lockdep_assert_held_read(&lowest->fs_info->commit_root_sem);
clone = btrfs_clone_extent_buffer(lowest);
if (!clone)
return -ENOMEM;
btrfs_release_path(path);
path->nodes[i] = clone;
path->slots[i] = slot;
return 0;
}
/*
* btrfs_search_slot - look for a key in a tree and perform necessary
@ -1693,6 +1710,7 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
const struct btrfs_key *key, struct btrfs_path *p,
int ins_len, int cow)
{
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *b;
int slot;
int ret;
@ -1734,6 +1752,11 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
min_write_lock_level = write_lock_level;
if (p->need_commit_sem) {
ASSERT(p->search_commit_root);
down_read(&fs_info->commit_root_sem);
}
again:
prev_cmp = -1;
b = btrfs_search_slot_get_root(root, p, write_lock_level);
@ -1928,6 +1951,16 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root *root,
done:
if (ret < 0 && !p->skip_release_on_error)
btrfs_release_path(p);
if (p->need_commit_sem) {
int ret2;
ret2 = finish_need_commit_sem_search(p);
up_read(&fs_info->commit_root_sem);
if (ret2)
ret = ret2;
}
return ret;
}
ALLOW_ERROR_INJECTION(btrfs_search_slot, ERRNO);
@ -4396,7 +4429,9 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
int level;
struct extent_buffer *c;
struct extent_buffer *next;
struct btrfs_fs_info *fs_info = root->fs_info;
struct btrfs_key key;
bool need_commit_sem = false;
u32 nritems;
int ret;
int i;
@ -4413,14 +4448,20 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
path->keep_locks = 1;
if (time_seq)
if (time_seq) {
ret = btrfs_search_old_slot(root, &key, path, time_seq);
else
} else {
if (path->need_commit_sem) {
path->need_commit_sem = 0;
need_commit_sem = true;
down_read(&fs_info->commit_root_sem);
}
ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
}
path->keep_locks = 0;
if (ret < 0)
return ret;
goto done;
nritems = btrfs_header_nritems(path->nodes[0]);
/*
@ -4543,6 +4584,15 @@ int btrfs_next_old_leaf(struct btrfs_root *root, struct btrfs_path *path,
ret = 0;
done:
unlock_up(path, 0, 1, 0, NULL);
if (need_commit_sem) {
int ret2;
path->need_commit_sem = 1;
ret2 = finish_need_commit_sem_search(path);
up_read(&fs_info->commit_root_sem);
if (ret2)
ret = ret2;
}
return ret;
}

View File

@ -568,7 +568,6 @@ enum {
/*
* Indicate that relocation of a chunk has started, it's set per chunk
* and is toggled between chunks.
* Set, tested and cleared while holding fs_info::send_reloc_lock.
*/
BTRFS_FS_RELOC_RUNNING,
@ -668,6 +667,12 @@ struct btrfs_fs_info {
u64 generation;
u64 last_trans_committed;
/*
* Generation of the last transaction used for block group relocation
* since the filesystem was last mounted (or 0 if none happened yet).
* Must be written and read while holding btrfs_fs_info::commit_root_sem.
*/
u64 last_reloc_trans;
u64 avg_delayed_ref_runtime;
/*
@ -997,13 +1002,6 @@ struct btrfs_fs_info {
struct crypto_shash *csum_shash;
spinlock_t send_reloc_lock;
/*
* Number of send operations in progress.
* Updated while holding fs_info::send_reloc_lock.
*/
int send_in_progress;
/* Type of exclusive operation running, protected by super_lock */
enum btrfs_exclusive_operation exclusive_operation;

View File

@ -2859,6 +2859,7 @@ static int __cold init_tree_roots(struct btrfs_fs_info *fs_info)
/* All successful */
fs_info->generation = generation;
fs_info->last_trans_committed = generation;
fs_info->last_reloc_trans = 0;
/* Always begin writing backup roots after the one being used */
if (backup_index < 0) {
@ -2992,9 +2993,6 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
spin_lock_init(&fs_info->swapfile_pins_lock);
fs_info->swapfile_pins = RB_ROOT;
spin_lock_init(&fs_info->send_reloc_lock);
fs_info->send_in_progress = 0;
fs_info->bg_reclaim_threshold = BTRFS_DEFAULT_RECLAIM_THRESH;
INIT_WORK(&fs_info->reclaim_bgs_work, btrfs_reclaim_bgs_work);
}

View File

@ -3854,25 +3854,14 @@ struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info,
* 0 success
* -EINPROGRESS operation is already in progress, that's probably a bug
* -ECANCELED cancellation request was set before the operation started
* -EAGAIN can not start because there are ongoing send operations
*/
static int reloc_chunk_start(struct btrfs_fs_info *fs_info)
{
spin_lock(&fs_info->send_reloc_lock);
if (fs_info->send_in_progress) {
btrfs_warn_rl(fs_info,
"cannot run relocation while send operations are in progress (%d in progress)",
fs_info->send_in_progress);
spin_unlock(&fs_info->send_reloc_lock);
return -EAGAIN;
}
if (test_and_set_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
/* This should not happen */
spin_unlock(&fs_info->send_reloc_lock);
btrfs_err(fs_info, "reloc already running, cannot start");
return -EINPROGRESS;
}
spin_unlock(&fs_info->send_reloc_lock);
if (atomic_read(&fs_info->reloc_cancel_req) > 0) {
btrfs_info(fs_info, "chunk relocation canceled on start");
@ -3894,9 +3883,7 @@ static void reloc_chunk_end(struct btrfs_fs_info *fs_info)
/* Requested after start, clear bit first so any waiters can continue */
if (atomic_read(&fs_info->reloc_cancel_req) > 0)
btrfs_info(fs_info, "chunk relocation canceled during operation");
spin_lock(&fs_info->send_reloc_lock);
clear_and_wake_up_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags);
spin_unlock(&fs_info->send_reloc_lock);
atomic_set(&fs_info->reloc_cancel_req, 0);
}

View File

@ -24,6 +24,7 @@
#include "transaction.h"
#include "compression.h"
#include "xattr.h"
#include "print-tree.h"
/*
* Maximum number of references an extent can have in order for us to attempt to
@ -95,6 +96,15 @@ struct send_ctx {
struct btrfs_path *right_path;
struct btrfs_key *cmp_key;
/*
* Keep track of the generation of the last transaction that was used
* for relocating a block group. This is periodically checked in order
* to detect if a relocation happened since the last check, so that we
* don't operate on stale extent buffers for nodes (level >= 1) or on
* stale disk_bytenr values of file extent items.
*/
u64 last_reloc_trans;
/*
* infos of the currently processed inode. In case of deleted inodes,
* these are the values from the deleted inode.
@ -1415,6 +1425,26 @@ static int find_extent_clone(struct send_ctx *sctx,
if (ret < 0)
goto out;
down_read(&fs_info->commit_root_sem);
if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
/*
* A transaction commit for a transaction in which block group
* relocation was done just happened.
* The disk_bytenr of the file extent item we processed is
* possibly stale, referring to the extent's location before
* relocation. So act as if we haven't found any clone sources
* and fallback to write commands, which will read the correct
* data from the new extent location. Otherwise we will fail
* below because we haven't found our own back reference or we
* could be getting incorrect sources in case the old extent
* was already reallocated after the relocation.
*/
up_read(&fs_info->commit_root_sem);
ret = -ENOENT;
goto out;
}
up_read(&fs_info->commit_root_sem);
if (!backref_ctx.found_itself) {
/* found a bug in backref code? */
ret = -EIO;
@ -6596,6 +6626,50 @@ static int changed_cb(struct btrfs_path *left_path,
{
int ret = 0;
/*
* We can not hold the commit root semaphore here. This is because in
* the case of sending and receiving to the same filesystem, using a
* pipe, could result in a deadlock:
*
* 1) The task running send blocks on the pipe because it's full;
*
* 2) The task running receive, which is the only consumer of the pipe,
* is waiting for a transaction commit (for example due to a space
* reservation when doing a write or triggering a transaction commit
* when creating a subvolume);
*
* 3) The transaction is waiting to write lock the commit root semaphore,
* but can not acquire it since it's being held at 1).
*
* Down this call chain we write to the pipe through kernel_write().
* The same type of problem can also happen when sending to a file that
* is stored in the same filesystem - when reserving space for a write
* into the file, we can trigger a transaction commit.
*
* Our caller has supplied us with clones of leaves from the send and
* parent roots, so we're safe here from a concurrent relocation and
* further reallocation of metadata extents while we are here. Below we
* also assert that the leaves are clones.
*/
lockdep_assert_not_held(&sctx->send_root->fs_info->commit_root_sem);
/*
* We always have a send root, so left_path is never NULL. We will not
* have a leaf when we have reached the end of the send root but have
* not yet reached the end of the parent root.
*/
if (left_path->nodes[0])
ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED,
&left_path->nodes[0]->bflags));
/*
* When doing a full send we don't have a parent root, so right_path is
* NULL. When doing an incremental send, we may have reached the end of
* the parent root already, so we don't have a leaf at right_path.
*/
if (right_path && right_path->nodes[0])
ASSERT(test_bit(EXTENT_BUFFER_UNMAPPED,
&right_path->nodes[0]->bflags));
if (result == BTRFS_COMPARE_TREE_SAME) {
if (key->type == BTRFS_INODE_REF_KEY ||
key->type == BTRFS_INODE_EXTREF_KEY) {
@ -6642,14 +6716,46 @@ static int changed_cb(struct btrfs_path *left_path,
return ret;
}
static int search_key_again(const struct send_ctx *sctx,
struct btrfs_root *root,
struct btrfs_path *path,
const struct btrfs_key *key)
{
int ret;
if (!path->need_commit_sem)
lockdep_assert_held_read(&root->fs_info->commit_root_sem);
/*
* Roots used for send operations are readonly and no one can add,
* update or remove keys from them, so we should be able to find our
* key again. The only exception is deduplication, which can operate on
* readonly roots and add, update or remove keys to/from them - but at
* the moment we don't allow it to run in parallel with send.
*/
ret = btrfs_search_slot(NULL, root, key, path, 0, 0);
ASSERT(ret <= 0);
if (ret > 0) {
btrfs_print_tree(path->nodes[path->lowest_level], false);
btrfs_err(root->fs_info,
"send: key (%llu %u %llu) not found in %s root %llu, lowest_level %d, slot %d",
key->objectid, key->type, key->offset,
(root == sctx->parent_root ? "parent" : "send"),
root->root_key.objectid, path->lowest_level,
path->slots[path->lowest_level]);
return -EUCLEAN;
}
return ret;
}
static int full_send_tree(struct send_ctx *sctx)
{
int ret;
struct btrfs_root *send_root = sctx->send_root;
struct btrfs_key key;
struct btrfs_fs_info *fs_info = send_root->fs_info;
struct btrfs_path *path;
struct extent_buffer *eb;
int slot;
path = alloc_path_for_send();
if (!path)
@ -6660,6 +6766,10 @@ static int full_send_tree(struct send_ctx *sctx)
key.type = BTRFS_INODE_ITEM_KEY;
key.offset = 0;
down_read(&fs_info->commit_root_sem);
sctx->last_reloc_trans = fs_info->last_reloc_trans;
up_read(&fs_info->commit_root_sem);
ret = btrfs_search_slot_for_read(send_root, &key, path, 1, 0);
if (ret < 0)
goto out;
@ -6667,15 +6777,35 @@ static int full_send_tree(struct send_ctx *sctx)
goto out_finish;
while (1) {
eb = path->nodes[0];
slot = path->slots[0];
btrfs_item_key_to_cpu(eb, &key, slot);
btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]);
ret = changed_cb(path, NULL, &key,
BTRFS_COMPARE_TREE_NEW, sctx);
if (ret < 0)
goto out;
down_read(&fs_info->commit_root_sem);
if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
sctx->last_reloc_trans = fs_info->last_reloc_trans;
up_read(&fs_info->commit_root_sem);
/*
* A transaction used for relocating a block group was
* committed or is about to finish its commit. Release
* our path (leaf) and restart the search, so that we
* avoid operating on any file extent items that are
* stale, with a disk_bytenr that reflects a pre
* relocation value. This way we avoid as much as
* possible to fallback to regular writes when checking
* if we can clone file ranges.
*/
btrfs_release_path(path);
ret = search_key_again(sctx, send_root, path, &key);
if (ret < 0)
goto out;
} else {
up_read(&fs_info->commit_root_sem);
}
ret = btrfs_next_item(send_root, path);
if (ret < 0)
goto out;
@ -6693,6 +6823,20 @@ static int full_send_tree(struct send_ctx *sctx)
return ret;
}
static int replace_node_with_clone(struct btrfs_path *path, int level)
{
struct extent_buffer *clone;
clone = btrfs_clone_extent_buffer(path->nodes[level]);
if (!clone)
return -ENOMEM;
free_extent_buffer(path->nodes[level]);
path->nodes[level] = clone;
return 0;
}
static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen)
{
struct extent_buffer *eb;
@ -6702,6 +6846,8 @@ static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen
u64 reada_max;
u64 reada_done = 0;
lockdep_assert_held_read(&parent->fs_info->commit_root_sem);
BUG_ON(*level == 0);
eb = btrfs_read_node_slot(parent, slot);
if (IS_ERR(eb))
@ -6725,6 +6871,10 @@ static int tree_move_down(struct btrfs_path *path, int *level, u64 reada_min_gen
path->nodes[*level - 1] = eb;
path->slots[*level - 1] = 0;
(*level)--;
if (*level == 0)
return replace_node_with_clone(path, 0);
return 0;
}
@ -6738,8 +6888,10 @@ static int tree_move_next_or_upnext(struct btrfs_path *path,
path->slots[*level]++;
while (path->slots[*level] >= nritems) {
if (*level == root_level)
if (*level == root_level) {
path->slots[*level] = nritems - 1;
return -1;
}
/* move upnext */
path->slots[*level] = 0;
@ -6771,14 +6923,20 @@ static int tree_advance(struct btrfs_path *path,
} else {
ret = tree_move_down(path, level, reada_min_gen);
}
if (ret >= 0) {
if (*level == 0)
btrfs_item_key_to_cpu(path->nodes[*level], key,
path->slots[*level]);
else
btrfs_node_key_to_cpu(path->nodes[*level], key,
path->slots[*level]);
}
/*
* Even if we have reached the end of a tree, ret is -1, update the key
* anyway, so that in case we need to restart due to a block group
* relocation, we can assert that the last key of the root node still
* exists in the tree.
*/
if (*level == 0)
btrfs_item_key_to_cpu(path->nodes[*level], key,
path->slots[*level]);
else
btrfs_node_key_to_cpu(path->nodes[*level], key,
path->slots[*level]);
return ret;
}
@ -6807,6 +6965,97 @@ static int tree_compare_item(struct btrfs_path *left_path,
return 0;
}
/*
* A transaction used for relocating a block group was committed or is about to
* finish its commit. Release our paths and restart the search, so that we are
* not using stale extent buffers:
*
* 1) For levels > 0, we are only holding references of extent buffers, without
* any locks on them, which does not prevent them from having been relocated
* and reallocated after the last time we released the commit root semaphore.
* The exception are the root nodes, for which we always have a clone, see
* the comment at btrfs_compare_trees();
*
* 2) For leaves, level 0, we are holding copies (clones) of extent buffers, so
* we are safe from the concurrent relocation and reallocation. However they
* can have file extent items with a pre relocation disk_bytenr value, so we
* restart the start from the current commit roots and clone the new leaves so
* that we get the post relocation disk_bytenr values. Not doing so, could
* make us clone the wrong data in case there are new extents using the old
* disk_bytenr that happen to be shared.
*/
static int restart_after_relocation(struct btrfs_path *left_path,
struct btrfs_path *right_path,
const struct btrfs_key *left_key,
const struct btrfs_key *right_key,
int left_level,
int right_level,
const struct send_ctx *sctx)
{
int root_level;
int ret;
lockdep_assert_held_read(&sctx->send_root->fs_info->commit_root_sem);
btrfs_release_path(left_path);
btrfs_release_path(right_path);
/*
* Since keys can not be added or removed to/from our roots because they
* are readonly and we do not allow deduplication to run in parallel
* (which can add, remove or change keys), the layout of the trees should
* not change.
*/
left_path->lowest_level = left_level;
ret = search_key_again(sctx, sctx->send_root, left_path, left_key);
if (ret < 0)
return ret;
right_path->lowest_level = right_level;
ret = search_key_again(sctx, sctx->parent_root, right_path, right_key);
if (ret < 0)
return ret;
/*
* If the lowest level nodes are leaves, clone them so that they can be
* safely used by changed_cb() while not under the protection of the
* commit root semaphore, even if relocation and reallocation happens in
* parallel.
*/
if (left_level == 0) {
ret = replace_node_with_clone(left_path, 0);
if (ret < 0)
return ret;
}
if (right_level == 0) {
ret = replace_node_with_clone(right_path, 0);
if (ret < 0)
return ret;
}
/*
* Now clone the root nodes (unless they happen to be the leaves we have
* already cloned). This is to protect against concurrent snapshotting of
* the send and parent roots (see the comment at btrfs_compare_trees()).
*/
root_level = btrfs_header_level(sctx->send_root->commit_root);
if (root_level > 0) {
ret = replace_node_with_clone(left_path, root_level);
if (ret < 0)
return ret;
}
root_level = btrfs_header_level(sctx->parent_root->commit_root);
if (root_level > 0) {
ret = replace_node_with_clone(right_path, root_level);
if (ret < 0)
return ret;
}
return 0;
}
/*
* This function compares two trees and calls the provided callback for
* every changed/new/deleted item it finds.
@ -6835,10 +7084,10 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
int right_root_level;
int left_level;
int right_level;
int left_end_reached;
int right_end_reached;
int advance_left;
int advance_right;
int left_end_reached = 0;
int right_end_reached = 0;
int advance_left = 0;
int advance_right = 0;
u64 left_blockptr;
u64 right_blockptr;
u64 left_gen;
@ -6906,12 +7155,18 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
down_read(&fs_info->commit_root_sem);
left_level = btrfs_header_level(left_root->commit_root);
left_root_level = left_level;
/*
* We clone the root node of the send and parent roots to prevent races
* with snapshot creation of these roots. Snapshot creation COWs the
* root node of a tree, so after the transaction is committed the old
* extent can be reallocated while this send operation is still ongoing.
* So we clone them, under the commit root semaphore, to be race free.
*/
left_path->nodes[left_level] =
btrfs_clone_extent_buffer(left_root->commit_root);
if (!left_path->nodes[left_level]) {
up_read(&fs_info->commit_root_sem);
ret = -ENOMEM;
goto out;
goto out_unlock;
}
right_level = btrfs_header_level(right_root->commit_root);
@ -6919,9 +7174,8 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
right_path->nodes[right_level] =
btrfs_clone_extent_buffer(right_root->commit_root);
if (!right_path->nodes[right_level]) {
up_read(&fs_info->commit_root_sem);
ret = -ENOMEM;
goto out;
goto out_unlock;
}
/*
* Our right root is the parent root, while the left root is the "send"
@ -6931,7 +7185,6 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
* will need to read them at some point.
*/
reada_min_gen = btrfs_header_generation(right_root->commit_root);
up_read(&fs_info->commit_root_sem);
if (left_level == 0)
btrfs_item_key_to_cpu(left_path->nodes[left_level],
@ -6946,11 +7199,26 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
btrfs_node_key_to_cpu(right_path->nodes[right_level],
&right_key, right_path->slots[right_level]);
left_end_reached = right_end_reached = 0;
advance_left = advance_right = 0;
sctx->last_reloc_trans = fs_info->last_reloc_trans;
while (1) {
cond_resched();
if (need_resched() ||
rwsem_is_contended(&fs_info->commit_root_sem)) {
up_read(&fs_info->commit_root_sem);
cond_resched();
down_read(&fs_info->commit_root_sem);
}
if (fs_info->last_reloc_trans > sctx->last_reloc_trans) {
ret = restart_after_relocation(left_path, right_path,
&left_key, &right_key,
left_level, right_level,
sctx);
if (ret < 0)
goto out_unlock;
sctx->last_reloc_trans = fs_info->last_reloc_trans;
}
if (advance_left && !left_end_reached) {
ret = tree_advance(left_path, &left_level,
left_root_level,
@ -6959,7 +7227,7 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
if (ret == -1)
left_end_reached = ADVANCE;
else if (ret < 0)
goto out;
goto out_unlock;
advance_left = 0;
}
if (advance_right && !right_end_reached) {
@ -6970,54 +7238,55 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
if (ret == -1)
right_end_reached = ADVANCE;
else if (ret < 0)
goto out;
goto out_unlock;
advance_right = 0;
}
if (left_end_reached && right_end_reached) {
ret = 0;
goto out;
goto out_unlock;
} else if (left_end_reached) {
if (right_level == 0) {
up_read(&fs_info->commit_root_sem);
ret = changed_cb(left_path, right_path,
&right_key,
BTRFS_COMPARE_TREE_DELETED,
sctx);
if (ret < 0)
goto out;
down_read(&fs_info->commit_root_sem);
}
advance_right = ADVANCE;
continue;
} else if (right_end_reached) {
if (left_level == 0) {
up_read(&fs_info->commit_root_sem);
ret = changed_cb(left_path, right_path,
&left_key,
BTRFS_COMPARE_TREE_NEW,
sctx);
if (ret < 0)
goto out;
down_read(&fs_info->commit_root_sem);
}
advance_left = ADVANCE;
continue;
}
if (left_level == 0 && right_level == 0) {
up_read(&fs_info->commit_root_sem);
cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
if (cmp < 0) {
ret = changed_cb(left_path, right_path,
&left_key,
BTRFS_COMPARE_TREE_NEW,
sctx);
if (ret < 0)
goto out;
advance_left = ADVANCE;
} else if (cmp > 0) {
ret = changed_cb(left_path, right_path,
&right_key,
BTRFS_COMPARE_TREE_DELETED,
sctx);
if (ret < 0)
goto out;
advance_right = ADVANCE;
} else {
enum btrfs_compare_tree_result result;
@ -7031,11 +7300,13 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
result = BTRFS_COMPARE_TREE_SAME;
ret = changed_cb(left_path, right_path,
&left_key, result, sctx);
if (ret < 0)
goto out;
advance_left = ADVANCE;
advance_right = ADVANCE;
}
if (ret < 0)
goto out;
down_read(&fs_info->commit_root_sem);
} else if (left_level == right_level) {
cmp = btrfs_comp_cpu_keys(&left_key, &right_key);
if (cmp < 0) {
@ -7075,6 +7346,8 @@ static int btrfs_compare_trees(struct btrfs_root *left_root,
}
}
out_unlock:
up_read(&fs_info->commit_root_sem);
out:
btrfs_free_path(left_path);
btrfs_free_path(right_path);
@ -7413,21 +7686,7 @@ long btrfs_ioctl_send(struct file *mnt_file, struct btrfs_ioctl_send_args *arg)
if (ret)
goto out;
spin_lock(&fs_info->send_reloc_lock);
if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags)) {
spin_unlock(&fs_info->send_reloc_lock);
btrfs_warn_rl(fs_info,
"cannot run send because a relocation operation is in progress");
ret = -EAGAIN;
goto out;
}
fs_info->send_in_progress++;
spin_unlock(&fs_info->send_reloc_lock);
ret = send_subvol(sctx);
spin_lock(&fs_info->send_reloc_lock);
fs_info->send_in_progress--;
spin_unlock(&fs_info->send_reloc_lock);
if (ret < 0)
goto out;

View File

@ -163,6 +163,10 @@ static noinline void switch_commit_roots(struct btrfs_trans_handle *trans)
struct btrfs_caching_control *caching_ctl, *next;
down_write(&fs_info->commit_root_sem);
if (test_bit(BTRFS_FS_RELOC_RUNNING, &fs_info->flags))
fs_info->last_reloc_trans = trans->transid;
list_for_each_entry_safe(root, tmp, &cur_trans->switch_commits,
dirty_list) {
list_del_init(&root->dirty_list);

View File

@ -941,7 +941,17 @@ static int fuse_copy_page(struct fuse_copy_state *cs, struct page **pagep,
while (count) {
if (cs->write && cs->pipebufs && page) {
return fuse_ref_page(cs, page, offset, count);
/*
* Can't control lifetime of pipe buffers, so always
* copy user pages.
*/
if (cs->req->args->user_pages) {
err = fuse_copy_fill(cs);
if (err)
return err;
} else {
return fuse_ref_page(cs, page, offset, count);
}
} else if (!cs->len) {
if (cs->move_pages && page &&
offset == 0 && count == PAGE_SIZE) {

View File

@ -1417,6 +1417,7 @@ static int fuse_get_user_pages(struct fuse_args_pages *ap, struct iov_iter *ii,
(PAGE_SIZE - ret) & (PAGE_SIZE - 1);
}
ap->args.user_pages = true;
if (write)
ap->args.in_pages = true;
else

View File

@ -256,6 +256,7 @@ struct fuse_args {
bool nocreds:1;
bool in_pages:1;
bool out_pages:1;
bool user_pages:1;
bool out_argvar:1;
bool page_zeroing:1;
bool page_replace:1;

View File

@ -394,9 +394,12 @@ static int fuse_priv_ioctl(struct inode *inode, struct fuse_file *ff,
args.out_args[1].value = ptr;
err = fuse_simple_request(fm, &args);
if (!err && outarg.flags & FUSE_IOCTL_RETRY)
err = -EIO;
if (!err) {
if (outarg.result < 0)
err = outarg.result;
else if (outarg.flags & FUSE_IOCTL_RETRY)
err = -EIO;
}
return err;
}

View File

@ -252,7 +252,8 @@ pipe_read(struct kiocb *iocb, struct iov_iter *to)
*/
was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage);
for (;;) {
unsigned int head = pipe->head;
/* Read ->head with a barrier vs post_one_notification() */
unsigned int head = smp_load_acquire(&pipe->head);
unsigned int tail = pipe->tail;
unsigned int mask = pipe->ring_size - 1;
@ -830,10 +831,8 @@ void free_pipe_info(struct pipe_inode_info *pipe)
int i;
#ifdef CONFIG_WATCH_QUEUE
if (pipe->watch_queue) {
if (pipe->watch_queue)
watch_queue_clear(pipe->watch_queue);
put_watch_queue(pipe->watch_queue);
}
#endif
(void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0);
@ -843,6 +842,10 @@ void free_pipe_info(struct pipe_inode_info *pipe)
if (buf->ops)
pipe_buf_release(pipe, buf);
}
#ifdef CONFIG_WATCH_QUEUE
if (pipe->watch_queue)
put_watch_queue(pipe->watch_queue);
#endif
if (pipe->tmp_page)
__free_page(pipe->tmp_page);
kfree(pipe->bufs);

View File

@ -62,8 +62,9 @@ struct virtio_shm_region {
* Returns the first 64 feature bits (all we currently need).
* @finalize_features: confirm what device features we'll be using.
* vdev: the virtio_device
* This gives the final feature bits for the device: it can change
* This sends the driver feature bits to the device: it can change
* the dev->feature bits if it wants.
* Note: despite the name this can be called any number of times.
* Returns 0 on success or error status
* @bus_name: return the bus name associated with the device (optional)
* vdev: the virtio_device

View File

@ -28,7 +28,8 @@ struct watch_type_filter {
struct watch_filter {
union {
struct rcu_head rcu;
unsigned long type_filter[2]; /* Bitmask of accepted types */
/* Bitmask of accepted types */
DECLARE_BITMAP(type_filter, WATCH_TYPE__NR);
};
u32 nr_filters; /* Number of filters */
struct watch_type_filter filters[];

View File

@ -1056,7 +1056,6 @@ void dsa_unregister_switch(struct dsa_switch *ds);
int dsa_register_switch(struct dsa_switch *ds);
void dsa_switch_shutdown(struct dsa_switch *ds);
struct dsa_switch *dsa_switch_find(int tree_index, int sw_index);
void dsa_flush_workqueue(void);
#ifdef CONFIG_PM_SLEEP
int dsa_switch_suspend(struct dsa_switch *ds);
int dsa_switch_resume(struct dsa_switch *ds);

View File

@ -4,6 +4,8 @@
#include <linux/skbuff.h>
#define ESP_SKB_FRAG_MAXSIZE (PAGE_SIZE << SKB_FRAG_PAGE_ORDER)
struct ip_esp_hdr;
static inline struct ip_esp_hdr *ip_esp_hdr(const struct sk_buff *skb)

View File

@ -1679,14 +1679,15 @@ int km_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
const struct xfrm_migrate *m, int num_bundles,
const struct xfrm_kmaddress *k,
const struct xfrm_encap_tmpl *encap);
struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net);
struct xfrm_state *xfrm_migrate_state_find(struct xfrm_migrate *m, struct net *net,
u32 if_id);
struct xfrm_state *xfrm_state_migrate(struct xfrm_state *x,
struct xfrm_migrate *m,
struct xfrm_encap_tmpl *encap);
int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
struct xfrm_migrate *m, int num_bundles,
struct xfrm_kmaddress *k, struct net *net,
struct xfrm_encap_tmpl *encap);
struct xfrm_encap_tmpl *encap, u32 if_id);
#endif
int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport);