3
0
mirror of https://github.com/Qortal/Brooklyn.git synced 2025-01-30 14:52:17 +00:00

T3Q sucks major dick cuz he is a fag

* Add new file system support
This commit is contained in:
Scare Crowe 2021-10-21 17:05:07 +05:00
parent bdc4d96db5
commit 465978962d
78 changed files with 37761 additions and 0 deletions

199
fs/aufs/Kconfig Normal file
View File

@ -0,0 +1,199 @@
# SPDX-License-Identifier: GPL-2.0
config AUFS_FS
tristate "Aufs (Advanced multi layered unification filesystem) support"
help
Aufs is a stackable unification filesystem such as Unionfs,
which unifies several directories and provides a merged single
directory.
In the early days, aufs was entirely re-designed and
re-implemented Unionfs Version 1.x series. Introducing many
original ideas, approaches and improvements, it becomes totally
different from Unionfs while keeping the basic features.
if AUFS_FS
choice
prompt "Maximum number of branches"
default AUFS_BRANCH_MAX_127
help
Specifies the maximum number of branches (or member directories)
in a single aufs. The larger value consumes more system
resources and has a minor impact to performance.
config AUFS_BRANCH_MAX_127
bool "127"
help
Specifies the maximum number of branches (or member directories)
in a single aufs. The larger value consumes more system
resources and has a minor impact to performance.
config AUFS_BRANCH_MAX_511
bool "511"
help
Specifies the maximum number of branches (or member directories)
in a single aufs. The larger value consumes more system
resources and has a minor impact to performance.
config AUFS_BRANCH_MAX_1023
bool "1023"
help
Specifies the maximum number of branches (or member directories)
in a single aufs. The larger value consumes more system
resources and has a minor impact to performance.
config AUFS_BRANCH_MAX_32767
bool "32767"
help
Specifies the maximum number of branches (or member directories)
in a single aufs. The larger value consumes more system
resources and has a minor impact to performance.
endchoice
config AUFS_SBILIST
bool
depends on AUFS_MAGIC_SYSRQ || PROC_FS
default y
help
Automatic configuration for internal use.
When aufs supports Magic SysRq or /proc, enabled automatically.
config AUFS_HNOTIFY
bool "Detect direct branch access (bypassing aufs)"
help
If you want to modify files on branches directly, eg. bypassing aufs,
and want aufs to detect the changes of them fully, then enable this
option and use 'udba=notify' mount option.
Currently there is only one available configuration, "fsnotify".
It will have a negative impact to the performance.
See detail in aufs.5.
choice
prompt "method" if AUFS_HNOTIFY
default AUFS_HFSNOTIFY
config AUFS_HFSNOTIFY
bool "fsnotify"
select FSNOTIFY
endchoice
config AUFS_EXPORT
bool "NFS-exportable aufs"
depends on EXPORTFS
help
If you want to export your mounted aufs via NFS, then enable this
option. There are several requirements for this configuration.
See detail in aufs.5.
config AUFS_INO_T_64
bool
depends on AUFS_EXPORT
depends on 64BIT && !(ALPHA || S390)
default y
help
Automatic configuration for internal use.
/* typedef unsigned long/int __kernel_ino_t */
/* alpha and s390x are int */
config AUFS_XATTR
bool "support for XATTR/EA (including Security Labels)"
help
If your branch fs supports XATTR/EA and you want to make them
available in aufs too, then enable this opsion and specify the
branch attributes for EA.
See detail in aufs.5.
config AUFS_FHSM
bool "File-based Hierarchical Storage Management"
help
Hierarchical Storage Management (or HSM) is a well-known feature
in the storage world. Aufs provides this feature as file-based.
with multiple branches.
These multiple branches are prioritized, ie. the topmost one
should be the fastest drive and be used heavily.
config AUFS_RDU
bool "Readdir in userspace"
help
Aufs has two methods to provide a merged view for a directory,
by a user-space library and by kernel-space natively. The latter
is always enabled but sometimes large and slow.
If you enable this option, install the library in aufs2-util
package, and set some environment variables for your readdir(3),
then the work will be handled in user-space which generally
shows better performance in most cases.
See detail in aufs.5.
config AUFS_DIRREN
bool "Workaround for rename(2)-ing a directory"
help
By default, aufs returns EXDEV error in renameing a dir who has
his child on the lower branch, since it is a bad idea to issue
rename(2) internally for every lower branch. But user may not
accept this behaviour. So here is a workaround to allow such
rename(2) and store some extra infromation on the writable
branch. Obviously this costs high (and I don't like it).
To use this feature, you need to enable this configuration AND
to specify the mount option `dirren.'
See details in aufs.5 and the design documents.
config AUFS_SHWH
bool "Show whiteouts"
help
If you want to make the whiteouts in aufs visible, then enable
this option and specify 'shwh' mount option. Although it may
sounds like philosophy or something, but in technically it
simply shows the name of whiteout with keeping its behaviour.
config AUFS_BR_RAMFS
bool "Ramfs (initramfs/rootfs) as an aufs branch"
help
If you want to use ramfs as an aufs branch fs, then enable this
option. Generally tmpfs is recommended.
Aufs prohibited them to be a branch fs by default, because
initramfs becomes unusable after switch_root or something
generally. If you sets initramfs as an aufs branch and boot your
system by switch_root, you will meet a problem easily since the
files in initramfs may be inaccessible.
Unless you are going to use ramfs as an aufs branch fs without
switch_root or something, leave it N.
config AUFS_BR_FUSE
bool "Fuse fs as an aufs branch"
depends on FUSE_FS
select AUFS_POLL
help
If you want to use fuse-based userspace filesystem as an aufs
branch fs, then enable this option.
It implements the internal poll(2) operation which is
implemented by fuse only (curretnly).
config AUFS_POLL
bool
help
Automatic configuration for internal use.
config AUFS_BR_HFSPLUS
bool "Hfsplus as an aufs branch"
depends on HFSPLUS_FS
default y
help
If you want to use hfsplus fs as an aufs branch fs, then enable
this option. This option introduces a small overhead at
copying-up a file on hfsplus.
config AUFS_BDEV_LOOP
bool
depends on BLK_DEV_LOOP
default y
help
Automatic configuration for internal use.
Convert =[ym] into =y.
config AUFS_DEBUG
bool "Debug aufs"
help
Enable this to compile aufs internal debug code.
It will have a negative impact to the performance.
config AUFS_MAGIC_SYSRQ
bool
depends on AUFS_DEBUG && MAGIC_SYSRQ
default y
help
Automatic configuration for internal use.
When aufs supports Magic SysRq, enabled automatically.
endif

46
fs/aufs/Makefile Normal file
View File

@ -0,0 +1,46 @@
# SPDX-License-Identifier: GPL-2.0
include ${src}/magic.mk
ifeq (${CONFIG_AUFS_FS},m)
include ${src}/conf.mk
endif
-include ${src}/priv_def.mk
# cf. include/linux/kernel.h
# enable pr_debug
ccflags-y += -DDEBUG
# sparse requires the full pathname
ifdef M
ccflags-y += -include ${M}/../../include/uapi/linux/aufs_type.h
else
ccflags-y += -include ${srctree}/include/uapi/linux/aufs_type.h
endif
obj-$(CONFIG_AUFS_FS) += aufs.o
aufs-y := module.o sbinfo.o super.o branch.o xino.o sysaufs.o opts.o \
wkq.o vfsub.o dcsub.o \
cpup.o whout.o wbr_policy.o \
dinfo.o dentry.o \
dynop.o \
finfo.o file.o f_op.o \
dir.o vdir.o \
iinfo.o inode.o i_op.o i_op_add.o i_op_del.o i_op_ren.o \
mvdown.o ioctl.o
# all are boolean
aufs-$(CONFIG_PROC_FS) += procfs.o plink.o
aufs-$(CONFIG_SYSFS) += sysfs.o
aufs-$(CONFIG_DEBUG_FS) += dbgaufs.o
aufs-$(CONFIG_AUFS_BDEV_LOOP) += loop.o
aufs-$(CONFIG_AUFS_HNOTIFY) += hnotify.o
aufs-$(CONFIG_AUFS_HFSNOTIFY) += hfsnotify.o
aufs-$(CONFIG_AUFS_EXPORT) += export.o
aufs-$(CONFIG_AUFS_XATTR) += xattr.o
aufs-$(CONFIG_FS_POSIX_ACL) += posix_acl.o
aufs-$(CONFIG_AUFS_DIRREN) += dirren.o
aufs-$(CONFIG_AUFS_FHSM) += fhsm.o
aufs-$(CONFIG_AUFS_POLL) += poll.o
aufs-$(CONFIG_AUFS_RDU) += rdu.o
aufs-$(CONFIG_AUFS_BR_HFSPLUS) += hfsplus.o
aufs-$(CONFIG_AUFS_DEBUG) += debug.o
aufs-$(CONFIG_AUFS_MAGIC_SYSRQ) += sysrq.o

62
fs/aufs/aufs.h Normal file
View File

@ -0,0 +1,62 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* all header files
*/
#ifndef __AUFS_H__
#define __AUFS_H__
#ifdef __KERNEL__
#define AuStub(type, name, body, ...) \
static inline type name(__VA_ARGS__) { body; }
#define AuStubVoid(name, ...) \
AuStub(void, name, , __VA_ARGS__)
#define AuStubInt0(name, ...) \
AuStub(int, name, return 0, __VA_ARGS__)
#include "debug.h"
#include "branch.h"
#include "cpup.h"
#include "dcsub.h"
#include "dbgaufs.h"
#include "dentry.h"
#include "dir.h"
#include "dirren.h"
#include "dynop.h"
#include "file.h"
#include "fstype.h"
#include "hbl.h"
#include "inode.h"
#include "lcnt.h"
#include "loop.h"
#include "module.h"
#include "opts.h"
#include "rwsem.h"
#include "super.h"
#include "sysaufs.h"
#include "vfsub.h"
#include "whout.h"
#include "wkq.h"
#endif /* __KERNEL__ */
#endif /* __AUFS_H__ */

1427
fs/aufs/branch.c Normal file

File diff suppressed because it is too large Load Diff

366
fs/aufs/branch.h Normal file
View File

@ -0,0 +1,366 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* branch filesystems and xino for them
*/
#ifndef __AUFS_BRANCH_H__
#define __AUFS_BRANCH_H__
#ifdef __KERNEL__
#include <linux/mount.h>
#include "dirren.h"
#include "dynop.h"
#include "lcnt.h"
#include "rwsem.h"
#include "super.h"
/* ---------------------------------------------------------------------- */
/* a xino file */
struct au_xino {
struct file **xi_file;
unsigned int xi_nfile;
struct {
spinlock_t spin;
ino_t *array;
int total;
/* reserved for future use */
/* unsigned long *bitmap; */
wait_queue_head_t wqh;
} xi_nondir;
struct mutex xi_mtx; /* protects xi_file array */
struct hlist_bl_head xi_writing;
atomic_t xi_truncating;
struct kref xi_kref;
};
/* File-based Hierarchical Storage Management */
struct au_br_fhsm {
#ifdef CONFIG_AUFS_FHSM
struct mutex bf_lock;
unsigned long bf_jiffy;
struct aufs_stfs bf_stfs;
int bf_readable;
#endif
};
/* members for writable branch only */
enum {AuBrWh_BASE, AuBrWh_PLINK, AuBrWh_ORPH, AuBrWh_Last};
struct au_wbr {
struct au_rwsem wbr_wh_rwsem;
struct dentry *wbr_wh[AuBrWh_Last];
atomic_t wbr_wh_running;
#define wbr_whbase wbr_wh[AuBrWh_BASE] /* whiteout base */
#define wbr_plink wbr_wh[AuBrWh_PLINK] /* pseudo-link dir */
#define wbr_orph wbr_wh[AuBrWh_ORPH] /* dir for orphans */
/* mfs mode */
unsigned long long wbr_bytes;
};
/* ext2 has 3 types of operations at least, ext3 has 4 */
#define AuBrDynOp (AuDyLast * 4)
#ifdef CONFIG_AUFS_HFSNOTIFY
/* support for asynchronous destruction */
struct au_br_hfsnotify {
struct fsnotify_group *hfsn_group;
};
#endif
/* sysfs entries */
struct au_brsysfs {
char name[16];
struct attribute attr;
};
enum {
AuBrSysfs_BR,
AuBrSysfs_BRID,
AuBrSysfs_Last
};
/* protected by superblock rwsem */
struct au_branch {
struct au_xino *br_xino;
aufs_bindex_t br_id;
int br_perm;
struct path br_path;
spinlock_t br_dykey_lock;
struct au_dykey *br_dykey[AuBrDynOp];
au_lcnt_t br_nfiles; /* opened files */
au_lcnt_t br_count; /* in-use for other */
struct au_wbr *br_wbr;
struct au_br_fhsm *br_fhsm;
#ifdef CONFIG_AUFS_HFSNOTIFY
struct au_br_hfsnotify *br_hfsn;
#endif
#ifdef CONFIG_SYSFS
/* entries under sysfs per mount-point */
struct au_brsysfs br_sysfs[AuBrSysfs_Last];
#endif
#ifdef CONFIG_DEBUG_FS
struct dentry *br_dbgaufs; /* xino */
#endif
struct au_dr_br br_dirren;
};
/* ---------------------------------------------------------------------- */
static inline struct vfsmount *au_br_mnt(struct au_branch *br)
{
return br->br_path.mnt;
}
static inline struct dentry *au_br_dentry(struct au_branch *br)
{
return br->br_path.dentry;
}
static inline struct super_block *au_br_sb(struct au_branch *br)
{
return au_br_mnt(br)->mnt_sb;
}
static inline int au_br_rdonly(struct au_branch *br)
{
return (sb_rdonly(au_br_sb(br))
|| !au_br_writable(br->br_perm))
? -EROFS : 0;
}
static inline int au_br_hnotifyable(int brperm __maybe_unused)
{
#ifdef CONFIG_AUFS_HNOTIFY
return !(brperm & AuBrPerm_RR);
#else
return 0;
#endif
}
static inline int au_br_test_oflag(int oflag, struct au_branch *br)
{
int err, exec_flag;
err = 0;
exec_flag = oflag & __FMODE_EXEC;
if (unlikely(exec_flag && path_noexec(&br->br_path)))
err = -EACCES;
return err;
}
static inline void au_xino_get(struct au_branch *br)
{
struct au_xino *xi;
xi = br->br_xino;
if (xi)
kref_get(&xi->xi_kref);
}
static inline int au_xino_count(struct au_branch *br)
{
int v;
struct au_xino *xi;
v = 0;
xi = br->br_xino;
if (xi)
v = kref_read(&xi->xi_kref);
return v;
}
/* ---------------------------------------------------------------------- */
/* branch.c */
struct au_sbinfo;
void au_br_free(struct au_sbinfo *sinfo);
int au_br_index(struct super_block *sb, aufs_bindex_t br_id);
struct au_opt_add;
int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount);
struct au_opt_del;
int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount);
long au_ibusy_ioctl(struct file *file, unsigned long arg);
#ifdef CONFIG_COMPAT
long au_ibusy_compat_ioctl(struct file *file, unsigned long arg);
#endif
struct au_opt_mod;
int au_br_mod(struct super_block *sb, struct au_opt_mod *mod, int remount,
int *do_refresh);
struct aufs_stfs;
int au_br_stfs(struct au_branch *br, struct aufs_stfs *stfs);
/* xino.c */
static const loff_t au_loff_max = LLONG_MAX;
aufs_bindex_t au_xi_root(struct super_block *sb, struct dentry *dentry);
struct file *au_xino_create(struct super_block *sb, char *fpath, int silent,
int wbrtop);
struct file *au_xino_create2(struct super_block *sb, struct path *base,
struct file *copy_src);
struct au_xi_new {
struct au_xino *xi; /* switch between xino and xigen */
int idx;
struct path *base;
struct file *copy_src;
};
struct file *au_xi_new(struct super_block *sb, struct au_xi_new *xinew);
int au_xino_read(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
ino_t *ino);
int au_xino_write(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
ino_t ino);
ssize_t xino_fread(vfs_readf_t func, struct file *file, void *buf, size_t size,
loff_t *pos);
ssize_t xino_fwrite(vfs_writef_t func, struct file *file, void *buf,
size_t size, loff_t *pos);
int au_xib_trunc(struct super_block *sb);
int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex, int idx_begin);
struct au_xino *au_xino_alloc(unsigned int nfile);
int au_xino_put(struct au_branch *br);
struct file *au_xino_file1(struct au_xino *xi);
struct au_opt_xino;
void au_xino_clr(struct super_block *sb);
int au_xino_set(struct super_block *sb, struct au_opt_xino *xiopt, int remount);
struct file *au_xino_def(struct super_block *sb);
int au_xino_init_br(struct super_block *sb, struct au_branch *br, ino_t hino,
struct path *base);
ino_t au_xino_new_ino(struct super_block *sb);
void au_xino_delete_inode(struct inode *inode, const int unlinked);
void au_xinondir_leave(struct super_block *sb, aufs_bindex_t bindex,
ino_t h_ino, int idx);
int au_xinondir_enter(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
int *idx);
int au_xino_path(struct seq_file *seq, struct file *file);
/* ---------------------------------------------------------------------- */
/* @idx is signed to accept -1 meaning the first file */
static inline struct file *au_xino_file(struct au_xino *xi, int idx)
{
struct file *file;
file = NULL;
if (!xi)
goto out;
if (idx >= 0) {
if (idx < xi->xi_nfile)
file = xi->xi_file[idx];
} else
file = au_xino_file1(xi);
out:
return file;
}
/* ---------------------------------------------------------------------- */
/* Superblock to branch */
static inline
aufs_bindex_t au_sbr_id(struct super_block *sb, aufs_bindex_t bindex)
{
return au_sbr(sb, bindex)->br_id;
}
static inline
struct vfsmount *au_sbr_mnt(struct super_block *sb, aufs_bindex_t bindex)
{
return au_br_mnt(au_sbr(sb, bindex));
}
static inline
struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
{
return au_br_sb(au_sbr(sb, bindex));
}
static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
{
return au_sbr(sb, bindex)->br_perm;
}
static inline int au_sbr_whable(struct super_block *sb, aufs_bindex_t bindex)
{
return au_br_whable(au_sbr_perm(sb, bindex));
}
/* ---------------------------------------------------------------------- */
#define wbr_wh_read_lock(wbr) au_rw_read_lock(&(wbr)->wbr_wh_rwsem)
#define wbr_wh_write_lock(wbr) au_rw_write_lock(&(wbr)->wbr_wh_rwsem)
#define wbr_wh_read_trylock(wbr) au_rw_read_trylock(&(wbr)->wbr_wh_rwsem)
#define wbr_wh_write_trylock(wbr) au_rw_write_trylock(&(wbr)->wbr_wh_rwsem)
/*
#define wbr_wh_read_trylock_nested(wbr) \
au_rw_read_trylock_nested(&(wbr)->wbr_wh_rwsem)
#define wbr_wh_write_trylock_nested(wbr) \
au_rw_write_trylock_nested(&(wbr)->wbr_wh_rwsem)
*/
#define wbr_wh_read_unlock(wbr) au_rw_read_unlock(&(wbr)->wbr_wh_rwsem)
#define wbr_wh_write_unlock(wbr) au_rw_write_unlock(&(wbr)->wbr_wh_rwsem)
#define wbr_wh_downgrade_lock(wbr) au_rw_dgrade_lock(&(wbr)->wbr_wh_rwsem)
#define WbrWhMustNoWaiters(wbr) AuRwMustNoWaiters(&(wbr)->wbr_wh_rwsem)
#define WbrWhMustAnyLock(wbr) AuRwMustAnyLock(&(wbr)->wbr_wh_rwsem)
#define WbrWhMustWriteLock(wbr) AuRwMustWriteLock(&(wbr)->wbr_wh_rwsem)
/* ---------------------------------------------------------------------- */
#ifdef CONFIG_AUFS_FHSM
static inline void au_br_fhsm_init(struct au_br_fhsm *brfhsm)
{
mutex_init(&brfhsm->bf_lock);
brfhsm->bf_jiffy = 0;
brfhsm->bf_readable = 0;
}
static inline void au_br_fhsm_fin(struct au_br_fhsm *brfhsm)
{
mutex_destroy(&brfhsm->bf_lock);
}
#else
AuStubVoid(au_br_fhsm_init, struct au_br_fhsm *brfhsm)
AuStubVoid(au_br_fhsm_fin, struct au_br_fhsm *brfhsm)
#endif
#endif /* __KERNEL__ */
#endif /* __AUFS_BRANCH_H__ */

40
fs/aufs/conf.mk Normal file
View File

@ -0,0 +1,40 @@
# SPDX-License-Identifier: GPL-2.0
AuConfStr = CONFIG_AUFS_FS=${CONFIG_AUFS_FS}
define AuConf
ifdef ${1}
AuConfStr += ${1}=${${1}}
endif
endef
AuConfAll = BRANCH_MAX_127 BRANCH_MAX_511 BRANCH_MAX_1023 BRANCH_MAX_32767 \
SBILIST \
HNOTIFY HFSNOTIFY \
EXPORT INO_T_64 \
XATTR \
FHSM \
RDU \
DIRREN \
SHWH \
BR_RAMFS \
BR_FUSE POLL \
BR_HFSPLUS \
BDEV_LOOP \
DEBUG MAGIC_SYSRQ
$(foreach i, ${AuConfAll}, \
$(eval $(call AuConf,CONFIG_AUFS_${i})))
AuConfName = ${obj}/conf.str
${AuConfName}.tmp: FORCE
@echo ${AuConfStr} | tr ' ' '\n' | sed -e 's/^/"/' -e 's/$$/\\n"/' > $@
${AuConfName}: ${AuConfName}.tmp
@diff -q $< $@ > /dev/null 2>&1 || { \
echo ' GEN ' $@; \
cp -p $< $@; \
}
FORCE:
clean-files += ${AuConfName} ${AuConfName}.tmp
${obj}/sysfs.o: ${AuConfName}
-include ${srctree}/${src}/conf_priv.mk

1458
fs/aufs/cpup.c Normal file

File diff suppressed because it is too large Load Diff

100
fs/aufs/cpup.h Normal file
View File

@ -0,0 +1,100 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* copy-up/down functions
*/
#ifndef __AUFS_CPUP_H__
#define __AUFS_CPUP_H__
#ifdef __KERNEL__
#include <linux/path.h>
struct inode;
struct file;
struct au_pin;
void au_cpup_attr_flags(struct inode *dst, unsigned int iflags);
void au_cpup_attr_timesizes(struct inode *inode);
void au_cpup_attr_nlink(struct inode *inode, int force);
void au_cpup_attr_changeable(struct inode *inode);
void au_cpup_igen(struct inode *inode, struct inode *h_inode);
void au_cpup_attr_all(struct inode *inode, int force);
/* ---------------------------------------------------------------------- */
struct au_cp_generic {
struct dentry *dentry;
aufs_bindex_t bdst, bsrc;
loff_t len;
struct au_pin *pin;
unsigned int flags;
};
/* cpup flags */
#define AuCpup_DTIME 1 /* do dtime_store/revert */
#define AuCpup_KEEPLINO (1 << 1) /* do not clear the lower xino,
for link(2) */
#define AuCpup_RENAME (1 << 2) /* rename after cpup */
#define AuCpup_HOPEN (1 << 3) /* call h_open_pre/post() in
cpup */
#define AuCpup_OVERWRITE (1 << 4) /* allow overwriting the
existing entry */
#define AuCpup_RWDST (1 << 5) /* force write target even if
the branch is marked as RO */
#ifndef CONFIG_AUFS_BR_HFSPLUS
#undef AuCpup_HOPEN
#define AuCpup_HOPEN 0
#endif
#define au_ftest_cpup(flags, name) ((flags) & AuCpup_##name)
#define au_fset_cpup(flags, name) \
do { (flags) |= AuCpup_##name; } while (0)
#define au_fclr_cpup(flags, name) \
do { (flags) &= ~AuCpup_##name; } while (0)
int au_copy_file(struct file *dst, struct file *src, loff_t len);
int au_sio_cpup_simple(struct au_cp_generic *cpg);
int au_sio_cpdown_simple(struct au_cp_generic *cpg);
int au_sio_cpup_wh(struct au_cp_generic *cpg, struct file *file);
int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
int (*cp)(struct dentry *dentry, aufs_bindex_t bdst,
struct au_pin *pin,
struct dentry *h_parent, void *arg),
void *arg);
int au_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
int au_test_and_cpup_dirs(struct dentry *dentry, aufs_bindex_t bdst);
/* ---------------------------------------------------------------------- */
/* keep timestamps when copyup */
struct au_dtime {
struct dentry *dt_dentry;
struct path dt_h_path;
struct timespec64 dt_atime, dt_mtime;
};
void au_dtime_store(struct au_dtime *dt, struct dentry *dentry,
struct path *h_path);
void au_dtime_revert(struct au_dtime *dt);
#endif /* __KERNEL__ */
#endif /* __AUFS_CPUP_H__ */

526
fs/aufs/dbgaufs.c Normal file
View File

@ -0,0 +1,526 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* debugfs interface
*/
#include <linux/debugfs.h>
#include "aufs.h"
#ifndef CONFIG_SYSFS
#error DEBUG_FS depends upon SYSFS
#endif
static struct dentry *dbgaufs;
static const mode_t dbgaufs_mode = 0444;
/* 20 is max digits length of ulong 64 */
struct dbgaufs_arg {
int n;
char a[20 * 4];
};
/*
* common function for all XINO files
*/
static int dbgaufs_xi_release(struct inode *inode __maybe_unused,
struct file *file)
{
void *p;
p = file->private_data;
if (p) {
/* this is struct dbgaufs_arg */
AuDebugOn(!au_kfree_sz_test(p));
au_kfree_do_rcu(p);
}
return 0;
}
static int dbgaufs_xi_open(struct file *xf, struct file *file, int do_fcnt,
int cnt)
{
int err;
struct kstat st;
struct dbgaufs_arg *p;
err = -ENOMEM;
p = kmalloc(sizeof(*p), GFP_NOFS);
if (unlikely(!p))
goto out;
err = 0;
p->n = 0;
file->private_data = p;
if (!xf)
goto out;
err = vfsub_getattr(&xf->f_path, &st);
if (!err) {
if (do_fcnt)
p->n = snprintf
(p->a, sizeof(p->a), "%d, %llux%u %lld\n",
cnt, st.blocks, st.blksize,
(long long)st.size);
else
p->n = snprintf(p->a, sizeof(p->a), "%llux%u %lld\n",
st.blocks, st.blksize,
(long long)st.size);
AuDebugOn(p->n >= sizeof(p->a));
} else {
p->n = snprintf(p->a, sizeof(p->a), "err %d\n", err);
err = 0;
}
out:
return err;
}
static ssize_t dbgaufs_xi_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct dbgaufs_arg *p;
p = file->private_data;
return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
}
/* ---------------------------------------------------------------------- */
struct dbgaufs_plink_arg {
int n;
char a[];
};
static int dbgaufs_plink_release(struct inode *inode __maybe_unused,
struct file *file)
{
free_page((unsigned long)file->private_data);
return 0;
}
static int dbgaufs_plink_open(struct inode *inode, struct file *file)
{
int err, i, limit;
unsigned long n, sum;
struct dbgaufs_plink_arg *p;
struct au_sbinfo *sbinfo;
struct super_block *sb;
struct hlist_bl_head *hbl;
err = -ENOMEM;
p = (void *)get_zeroed_page(GFP_NOFS);
if (unlikely(!p))
goto out;
err = -EFBIG;
sbinfo = inode->i_private;
sb = sbinfo->si_sb;
si_noflush_read_lock(sb);
if (au_opt_test(au_mntflags(sb), PLINK)) {
limit = PAGE_SIZE - sizeof(p->n);
/* the number of buckets */
n = snprintf(p->a + p->n, limit, "%d\n", AuPlink_NHASH);
p->n += n;
limit -= n;
sum = 0;
for (i = 0, hbl = sbinfo->si_plink; i < AuPlink_NHASH;
i++, hbl++) {
n = au_hbl_count(hbl);
sum += n;
n = snprintf(p->a + p->n, limit, "%lu ", n);
p->n += n;
limit -= n;
if (unlikely(limit <= 0))
goto out_free;
}
p->a[p->n - 1] = '\n';
/* the sum of plinks */
n = snprintf(p->a + p->n, limit, "%lu\n", sum);
p->n += n;
limit -= n;
if (unlikely(limit <= 0))
goto out_free;
} else {
#define str "1\n0\n0\n"
p->n = sizeof(str) - 1;
strcpy(p->a, str);
#undef str
}
si_read_unlock(sb);
err = 0;
file->private_data = p;
goto out; /* success */
out_free:
free_page((unsigned long)p);
out:
return err;
}
static ssize_t dbgaufs_plink_read(struct file *file, char __user *buf,
size_t count, loff_t *ppos)
{
struct dbgaufs_plink_arg *p;
p = file->private_data;
return simple_read_from_buffer(buf, count, ppos, p->a, p->n);
}
static const struct file_operations dbgaufs_plink_fop = {
.owner = THIS_MODULE,
.open = dbgaufs_plink_open,
.release = dbgaufs_plink_release,
.read = dbgaufs_plink_read
};
/* ---------------------------------------------------------------------- */
static int dbgaufs_xib_open(struct inode *inode, struct file *file)
{
int err;
struct au_sbinfo *sbinfo;
struct super_block *sb;
sbinfo = inode->i_private;
sb = sbinfo->si_sb;
si_noflush_read_lock(sb);
err = dbgaufs_xi_open(sbinfo->si_xib, file, /*do_fcnt*/0, /*cnt*/0);
si_read_unlock(sb);
return err;
}
static const struct file_operations dbgaufs_xib_fop = {
.owner = THIS_MODULE,
.open = dbgaufs_xib_open,
.release = dbgaufs_xi_release,
.read = dbgaufs_xi_read
};
/* ---------------------------------------------------------------------- */
#define DbgaufsXi_PREFIX "xi"
static int dbgaufs_xino_open(struct inode *inode, struct file *file)
{
int err, idx;
long l;
aufs_bindex_t bindex;
char *p, a[sizeof(DbgaufsXi_PREFIX) + 8];
struct au_sbinfo *sbinfo;
struct super_block *sb;
struct au_xino *xi;
struct file *xf;
struct qstr *name;
struct au_branch *br;
err = -ENOENT;
name = &file->f_path.dentry->d_name;
if (unlikely(name->len < sizeof(DbgaufsXi_PREFIX)
|| memcmp(name->name, DbgaufsXi_PREFIX,
sizeof(DbgaufsXi_PREFIX) - 1)))
goto out;
AuDebugOn(name->len >= sizeof(a));
memcpy(a, name->name, name->len);
a[name->len] = '\0';
p = strchr(a, '-');
if (p)
*p = '\0';
err = kstrtol(a + sizeof(DbgaufsXi_PREFIX) - 1, 10, &l);
if (unlikely(err))
goto out;
bindex = l;
idx = 0;
if (p) {
err = kstrtol(p + 1, 10, &l);
if (unlikely(err))
goto out;
idx = l;
}
err = -ENOENT;
sbinfo = inode->i_private;
sb = sbinfo->si_sb;
si_noflush_read_lock(sb);
if (unlikely(bindex < 0 || bindex > au_sbbot(sb)))
goto out_si;
br = au_sbr(sb, bindex);
xi = br->br_xino;
if (unlikely(idx >= xi->xi_nfile))
goto out_si;
xf = au_xino_file(xi, idx);
if (xf)
err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1,
au_xino_count(br));
out_si:
si_read_unlock(sb);
out:
AuTraceErr(err);
return err;
}
static const struct file_operations dbgaufs_xino_fop = {
.owner = THIS_MODULE,
.open = dbgaufs_xino_open,
.release = dbgaufs_xi_release,
.read = dbgaufs_xi_read
};
void dbgaufs_xino_del(struct au_branch *br)
{
struct dentry *dbgaufs;
dbgaufs = br->br_dbgaufs;
if (!dbgaufs)
return;
br->br_dbgaufs = NULL;
/* debugfs acquires the parent i_mutex */
lockdep_off();
debugfs_remove(dbgaufs);
lockdep_on();
}
void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
{
aufs_bindex_t bbot;
struct au_branch *br;
if (!au_sbi(sb)->si_dbgaufs)
return;
bbot = au_sbbot(sb);
for (; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
dbgaufs_xino_del(br);
}
}
static void dbgaufs_br_do_add(struct super_block *sb, aufs_bindex_t bindex,
unsigned int idx, struct dentry *parent,
struct au_sbinfo *sbinfo)
{
struct au_branch *br;
struct dentry *d;
/* "xi" bindex(5) "-" idx(2) NULL */
char name[sizeof(DbgaufsXi_PREFIX) + 8];
if (!idx)
snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
else
snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d-%u",
bindex, idx);
br = au_sbr(sb, bindex);
if (br->br_dbgaufs) {
struct qstr qstr = QSTR_INIT(name, strlen(name));
if (!au_qstreq(&br->br_dbgaufs->d_name, &qstr)) {
/* debugfs acquires the parent i_mutex */
lockdep_off();
d = debugfs_rename(parent, br->br_dbgaufs, parent,
name);
lockdep_on();
if (unlikely(!d))
pr_warn("failed renaming %pd/%s, ignored.\n",
parent, name);
}
} else {
lockdep_off();
br->br_dbgaufs = debugfs_create_file(name, dbgaufs_mode, parent,
sbinfo, &dbgaufs_xino_fop);
lockdep_on();
if (unlikely(!br->br_dbgaufs))
pr_warn("failed creating %pd/%s, ignored.\n",
parent, name);
}
}
static void dbgaufs_br_add(struct super_block *sb, aufs_bindex_t bindex,
struct dentry *parent, struct au_sbinfo *sbinfo)
{
struct au_branch *br;
struct au_xino *xi;
unsigned int u;
br = au_sbr(sb, bindex);
xi = br->br_xino;
for (u = 0; u < xi->xi_nfile; u++)
dbgaufs_br_do_add(sb, bindex, u, parent, sbinfo);
}
void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex, int topdown)
{
struct au_sbinfo *sbinfo;
struct dentry *parent;
aufs_bindex_t bbot;
if (!au_opt_test(au_mntflags(sb), XINO))
return;
sbinfo = au_sbi(sb);
parent = sbinfo->si_dbgaufs;
if (!parent)
return;
bbot = au_sbbot(sb);
if (topdown)
for (; bindex <= bbot; bindex++)
dbgaufs_br_add(sb, bindex, parent, sbinfo);
else
for (; bbot >= bindex; bbot--)
dbgaufs_br_add(sb, bbot, parent, sbinfo);
}
/* ---------------------------------------------------------------------- */
#ifdef CONFIG_AUFS_EXPORT
static int dbgaufs_xigen_open(struct inode *inode, struct file *file)
{
int err;
struct au_sbinfo *sbinfo;
struct super_block *sb;
sbinfo = inode->i_private;
sb = sbinfo->si_sb;
si_noflush_read_lock(sb);
err = dbgaufs_xi_open(sbinfo->si_xigen, file, /*do_fcnt*/0, /*cnt*/0);
si_read_unlock(sb);
return err;
}
static const struct file_operations dbgaufs_xigen_fop = {
.owner = THIS_MODULE,
.open = dbgaufs_xigen_open,
.release = dbgaufs_xi_release,
.read = dbgaufs_xi_read
};
static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
{
int err;
/*
* This function is a dynamic '__init' function actually,
* so the tiny check for si_rwsem is unnecessary.
*/
/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
err = -EIO;
sbinfo->si_dbgaufs_xigen = debugfs_create_file
("xigen", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
&dbgaufs_xigen_fop);
if (sbinfo->si_dbgaufs_xigen)
err = 0;
return err;
}
#else
static int dbgaufs_xigen_init(struct au_sbinfo *sbinfo)
{
return 0;
}
#endif /* CONFIG_AUFS_EXPORT */
/* ---------------------------------------------------------------------- */
void dbgaufs_si_fin(struct au_sbinfo *sbinfo)
{
/*
* This function is a dynamic '__fin' function actually,
* so the tiny check for si_rwsem is unnecessary.
*/
/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
debugfs_remove_recursive(sbinfo->si_dbgaufs);
sbinfo->si_dbgaufs = NULL;
}
int dbgaufs_si_init(struct au_sbinfo *sbinfo)
{
int err;
char name[SysaufsSiNameLen];
/*
* This function is a dynamic '__init' function actually,
* so the tiny check for si_rwsem is unnecessary.
*/
/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
err = -ENOENT;
if (!dbgaufs) {
AuErr1("/debug/aufs is uninitialized\n");
goto out;
}
err = -EIO;
sysaufs_name(sbinfo, name);
sbinfo->si_dbgaufs = debugfs_create_dir(name, dbgaufs);
if (unlikely(!sbinfo->si_dbgaufs))
goto out;
/* regardless plink/noplink option */
sbinfo->si_dbgaufs_plink = debugfs_create_file
("plink", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
&dbgaufs_plink_fop);
if (unlikely(!sbinfo->si_dbgaufs_plink))
goto out_dir;
/* regardless xino/noxino option */
sbinfo->si_dbgaufs_xib = debugfs_create_file
("xib", dbgaufs_mode, sbinfo->si_dbgaufs, sbinfo,
&dbgaufs_xib_fop);
if (unlikely(!sbinfo->si_dbgaufs_xib))
goto out_dir;
err = dbgaufs_xigen_init(sbinfo);
if (!err)
goto out; /* success */
out_dir:
dbgaufs_si_fin(sbinfo);
out:
if (unlikely(err))
pr_err("debugfs/aufs failed\n");
return err;
}
/* ---------------------------------------------------------------------- */
void dbgaufs_fin(void)
{
debugfs_remove(dbgaufs);
}
int __init dbgaufs_init(void)
{
int err;
err = -EIO;
dbgaufs = debugfs_create_dir(AUFS_NAME, NULL);
if (dbgaufs)
err = 0;
return err;
}

53
fs/aufs/dbgaufs.h Normal file
View File

@ -0,0 +1,53 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* debugfs interface
*/
#ifndef __DBGAUFS_H__
#define __DBGAUFS_H__
#ifdef __KERNEL__
struct super_block;
struct au_sbinfo;
struct au_branch;
#ifdef CONFIG_DEBUG_FS
/* dbgaufs.c */
void dbgaufs_xino_del(struct au_branch *br);
void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex, int topdown);
void dbgaufs_si_fin(struct au_sbinfo *sbinfo);
int dbgaufs_si_init(struct au_sbinfo *sbinfo);
void dbgaufs_fin(void);
int __init dbgaufs_init(void);
#else
AuStubVoid(dbgaufs_xino_del, struct au_branch *br)
AuStubVoid(dbgaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
AuStubVoid(dbgaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex,
int topdown)
AuStubVoid(dbgaufs_si_fin, struct au_sbinfo *sbinfo)
AuStubInt0(dbgaufs_si_init, struct au_sbinfo *sbinfo)
AuStubVoid(dbgaufs_fin, void)
AuStubInt0(__init dbgaufs_init, void)
#endif /* CONFIG_DEBUG_FS */
#endif /* __KERNEL__ */
#endif /* __DBGAUFS_H__ */

225
fs/aufs/dcsub.c Normal file
View File

@ -0,0 +1,225 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* sub-routines for dentry cache
*/
#include "aufs.h"
static void au_dpage_free(struct au_dpage *dpage)
{
int i;
struct dentry **p;
p = dpage->dentries;
for (i = 0; i < dpage->ndentry; i++)
dput(*p++);
free_page((unsigned long)dpage->dentries);
}
int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp)
{
int err;
void *p;
err = -ENOMEM;
dpages->dpages = kmalloc(sizeof(*dpages->dpages), gfp);
if (unlikely(!dpages->dpages))
goto out;
p = (void *)__get_free_page(gfp);
if (unlikely(!p))
goto out_dpages;
dpages->dpages[0].ndentry = 0;
dpages->dpages[0].dentries = p;
dpages->ndpage = 1;
return 0; /* success */
out_dpages:
au_kfree_try_rcu(dpages->dpages);
out:
return err;
}
void au_dpages_free(struct au_dcsub_pages *dpages)
{
int i;
struct au_dpage *p;
p = dpages->dpages;
for (i = 0; i < dpages->ndpage; i++)
au_dpage_free(p++);
au_kfree_try_rcu(dpages->dpages);
}
static int au_dpages_append(struct au_dcsub_pages *dpages,
struct dentry *dentry, gfp_t gfp)
{
int err, sz;
struct au_dpage *dpage;
void *p;
dpage = dpages->dpages + dpages->ndpage - 1;
sz = PAGE_SIZE / sizeof(dentry);
if (unlikely(dpage->ndentry >= sz)) {
AuLabel(new dpage);
err = -ENOMEM;
sz = dpages->ndpage * sizeof(*dpages->dpages);
p = au_kzrealloc(dpages->dpages, sz,
sz + sizeof(*dpages->dpages), gfp,
/*may_shrink*/0);
if (unlikely(!p))
goto out;
dpages->dpages = p;
dpage = dpages->dpages + dpages->ndpage;
p = (void *)__get_free_page(gfp);
if (unlikely(!p))
goto out;
dpage->ndentry = 0;
dpage->dentries = p;
dpages->ndpage++;
}
AuDebugOn(au_dcount(dentry) <= 0);
dpage->dentries[dpage->ndentry++] = dget_dlock(dentry);
return 0; /* success */
out:
return err;
}
/* todo: BAD approach */
/* copied from linux/fs/dcache.c */
enum d_walk_ret {
D_WALK_CONTINUE,
D_WALK_QUIT,
D_WALK_NORETRY,
D_WALK_SKIP,
};
extern void d_walk(struct dentry *parent, void *data,
enum d_walk_ret (*enter)(void *, struct dentry *));
struct ac_dpages_arg {
int err;
struct au_dcsub_pages *dpages;
struct super_block *sb;
au_dpages_test test;
void *arg;
};
static enum d_walk_ret au_call_dpages_append(void *_arg, struct dentry *dentry)
{
enum d_walk_ret ret;
struct ac_dpages_arg *arg = _arg;
ret = D_WALK_CONTINUE;
if (dentry->d_sb == arg->sb
&& !IS_ROOT(dentry)
&& au_dcount(dentry) > 0
&& au_di(dentry)
&& (!arg->test || arg->test(dentry, arg->arg))) {
arg->err = au_dpages_append(arg->dpages, dentry, GFP_ATOMIC);
if (unlikely(arg->err))
ret = D_WALK_QUIT;
}
return ret;
}
int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
au_dpages_test test, void *arg)
{
struct ac_dpages_arg args = {
.err = 0,
.dpages = dpages,
.sb = root->d_sb,
.test = test,
.arg = arg
};
d_walk(root, &args, au_call_dpages_append);
return args.err;
}
int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
int do_include, au_dpages_test test, void *arg)
{
int err;
err = 0;
write_seqlock(&rename_lock);
spin_lock(&dentry->d_lock);
if (do_include
&& au_dcount(dentry) > 0
&& (!test || test(dentry, arg)))
err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
spin_unlock(&dentry->d_lock);
if (unlikely(err))
goto out;
/*
* RCU for vfsmount is unnecessary since this is a traverse in a single
* mount
*/
while (!IS_ROOT(dentry)) {
dentry = dentry->d_parent; /* rename_lock is locked */
spin_lock(&dentry->d_lock);
if (au_dcount(dentry) > 0
&& (!test || test(dentry, arg)))
err = au_dpages_append(dpages, dentry, GFP_ATOMIC);
spin_unlock(&dentry->d_lock);
if (unlikely(err))
break;
}
out:
write_sequnlock(&rename_lock);
return err;
}
static inline int au_dcsub_dpages_aufs(struct dentry *dentry, void *arg)
{
return au_di(dentry) && dentry->d_sb == arg;
}
int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
struct dentry *dentry, int do_include)
{
return au_dcsub_pages_rev(dpages, dentry, do_include,
au_dcsub_dpages_aufs, dentry->d_sb);
}
int au_test_subdir(struct dentry *d1, struct dentry *d2)
{
struct path path[2] = {
{
.dentry = d1
},
{
.dentry = d2
}
};
return path_is_under(path + 0, path + 1);
}

137
fs/aufs/dcsub.h Normal file
View File

@ -0,0 +1,137 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* sub-routines for dentry cache
*/
#ifndef __AUFS_DCSUB_H__
#define __AUFS_DCSUB_H__
#ifdef __KERNEL__
#include <linux/dcache.h>
#include <linux/fs.h>
struct au_dpage {
int ndentry;
struct dentry **dentries;
};
struct au_dcsub_pages {
int ndpage;
struct au_dpage *dpages;
};
/* ---------------------------------------------------------------------- */
/* dcsub.c */
int au_dpages_init(struct au_dcsub_pages *dpages, gfp_t gfp);
void au_dpages_free(struct au_dcsub_pages *dpages);
typedef int (*au_dpages_test)(struct dentry *dentry, void *arg);
int au_dcsub_pages(struct au_dcsub_pages *dpages, struct dentry *root,
au_dpages_test test, void *arg);
int au_dcsub_pages_rev(struct au_dcsub_pages *dpages, struct dentry *dentry,
int do_include, au_dpages_test test, void *arg);
int au_dcsub_pages_rev_aufs(struct au_dcsub_pages *dpages,
struct dentry *dentry, int do_include);
int au_test_subdir(struct dentry *d1, struct dentry *d2);
/* ---------------------------------------------------------------------- */
/*
* todo: in linux-3.13, several similar (but faster) helpers are added to
* include/linux/dcache.h. Try them (in the future).
*/
static inline int au_d_hashed_positive(struct dentry *d)
{
int err;
struct inode *inode = d_inode(d);
err = 0;
if (unlikely(d_unhashed(d)
|| d_is_negative(d)
|| !inode->i_nlink))
err = -ENOENT;
return err;
}
static inline int au_d_linkable(struct dentry *d)
{
int err;
struct inode *inode = d_inode(d);
err = au_d_hashed_positive(d);
if (err
&& d_is_positive(d)
&& (inode->i_state & I_LINKABLE))
err = 0;
return err;
}
static inline int au_d_alive(struct dentry *d)
{
int err;
struct inode *inode;
err = 0;
if (!IS_ROOT(d))
err = au_d_hashed_positive(d);
else {
inode = d_inode(d);
if (unlikely(d_unlinked(d)
|| d_is_negative(d)
|| !inode->i_nlink))
err = -ENOENT;
}
return err;
}
static inline int au_alive_dir(struct dentry *d)
{
int err;
err = au_d_alive(d);
if (unlikely(err || IS_DEADDIR(d_inode(d))))
err = -ENOENT;
return err;
}
static inline int au_qstreq(struct qstr *a, struct qstr *b)
{
return a->len == b->len
&& !memcmp(a->name, b->name, a->len);
}
/*
* by the commit
* 360f547 2015-01-25 dcache: let the dentry count go down to zero without
* taking d_lock
* the type of d_lockref.count became int, but the inlined function d_count()
* still returns unsigned int.
* I don't know why. Maybe it is for every d_count() users?
* Anyway au_dcount() lives on.
*/
static inline int au_dcount(struct dentry *d)
{
return (int)d_count(d);
}
#endif /* __KERNEL__ */
#endif /* __AUFS_DCSUB_H__ */

441
fs/aufs/debug.c Normal file
View File

@ -0,0 +1,441 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* debug print functions
*/
#include <linux/iversion.h>
#include "aufs.h"
/* Returns 0, or -errno. arg is in kp->arg. */
static int param_atomic_t_set(const char *val, const struct kernel_param *kp)
{
int err, n;
err = kstrtoint(val, 0, &n);
if (!err) {
if (n > 0)
au_debug_on();
else
au_debug_off();
}
return err;
}
/* Returns length written or -errno. Buffer is 4k (ie. be short!) */
static int param_atomic_t_get(char *buffer, const struct kernel_param *kp)
{
atomic_t *a;
a = kp->arg;
return sprintf(buffer, "%d", atomic_read(a));
}
static struct kernel_param_ops param_ops_atomic_t = {
.set = param_atomic_t_set,
.get = param_atomic_t_get
/* void (*free)(void *arg) */
};
atomic_t aufs_debug = ATOMIC_INIT(0);
MODULE_PARM_DESC(debug, "debug print");
module_param_named(debug, aufs_debug, atomic_t, 0664);
DEFINE_MUTEX(au_dbg_mtx); /* just to serialize the dbg msgs */
char *au_plevel = KERN_DEBUG;
#define dpri(fmt, ...) do { \
if ((au_plevel \
&& strcmp(au_plevel, KERN_DEBUG)) \
|| au_debug_test()) \
printk("%s" fmt, au_plevel, ##__VA_ARGS__); \
} while (0)
/* ---------------------------------------------------------------------- */
void au_dpri_whlist(struct au_nhash *whlist)
{
unsigned long ul, n;
struct hlist_head *head;
struct au_vdir_wh *pos;
n = whlist->nh_num;
head = whlist->nh_head;
for (ul = 0; ul < n; ul++) {
hlist_for_each_entry(pos, head, wh_hash)
dpri("b%d, %.*s, %d\n",
pos->wh_bindex,
pos->wh_str.len, pos->wh_str.name,
pos->wh_str.len);
head++;
}
}
void au_dpri_vdir(struct au_vdir *vdir)
{
unsigned long ul;
union au_vdir_deblk_p p;
unsigned char *o;
if (!vdir || IS_ERR(vdir)) {
dpri("err %ld\n", PTR_ERR(vdir));
return;
}
dpri("deblk %u, nblk %lu, deblk %p, last{%lu, %p}, ver %llu\n",
vdir->vd_deblk_sz, vdir->vd_nblk, vdir->vd_deblk,
vdir->vd_last.ul, vdir->vd_last.p.deblk, vdir->vd_version);
for (ul = 0; ul < vdir->vd_nblk; ul++) {
p.deblk = vdir->vd_deblk[ul];
o = p.deblk;
dpri("[%lu]: %p\n", ul, o);
}
}
static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
struct dentry *wh)
{
char *n = NULL;
int l = 0;
if (!inode || IS_ERR(inode)) {
dpri("i%d: err %ld\n", bindex, PTR_ERR(inode));
return -1;
}
/* the type of i_blocks depends upon CONFIG_LBDAF */
BUILD_BUG_ON(sizeof(inode->i_blocks) != sizeof(unsigned long)
&& sizeof(inode->i_blocks) != sizeof(u64));
if (wh) {
n = (void *)wh->d_name.name;
l = wh->d_name.len;
}
dpri("i%d: %p, i%lu, %s, cnt %d, nl %u, 0%o, sz %llu, blk %llu,"
" hn %d, ct %lld, np %lu, st 0x%lx, f 0x%x, v %llu, g %x%s%.*s\n",
bindex, inode,
inode->i_ino, inode->i_sb ? au_sbtype(inode->i_sb) : "??",
atomic_read(&inode->i_count), inode->i_nlink, inode->i_mode,
i_size_read(inode), (unsigned long long)inode->i_blocks,
hn, (long long)timespec64_to_ns(&inode->i_ctime) & 0x0ffff,
inode->i_mapping ? inode->i_mapping->nrpages : 0,
inode->i_state, inode->i_flags, inode_peek_iversion(inode),
inode->i_generation,
l ? ", wh " : "", l, n);
return 0;
}
void au_dpri_inode(struct inode *inode)
{
struct au_iinfo *iinfo;
struct au_hinode *hi;
aufs_bindex_t bindex;
int err, hn;
err = do_pri_inode(-1, inode, -1, NULL);
if (err || !au_test_aufs(inode->i_sb) || au_is_bad_inode(inode))
return;
iinfo = au_ii(inode);
dpri("i-1: btop %d, bbot %d, gen %d\n",
iinfo->ii_btop, iinfo->ii_bbot, au_iigen(inode, NULL));
if (iinfo->ii_btop < 0)
return;
hn = 0;
for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot; bindex++) {
hi = au_hinode(iinfo, bindex);
hn = !!au_hn(hi);
do_pri_inode(bindex, hi->hi_inode, hn, hi->hi_whdentry);
}
}
void au_dpri_dalias(struct inode *inode)
{
struct dentry *d;
spin_lock(&inode->i_lock);
hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias)
au_dpri_dentry(d);
spin_unlock(&inode->i_lock);
}
static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
{
struct dentry *wh = NULL;
int hn;
struct inode *inode;
struct au_iinfo *iinfo;
struct au_hinode *hi;
if (!dentry || IS_ERR(dentry)) {
dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
return -1;
}
/* do not call dget_parent() here */
/* note: access d_xxx without d_lock */
dpri("d%d: %p, %pd2?, %s, cnt %d, flags 0x%x, %shashed\n",
bindex, dentry, dentry,
dentry->d_sb ? au_sbtype(dentry->d_sb) : "??",
au_dcount(dentry), dentry->d_flags,
d_unhashed(dentry) ? "un" : "");
hn = -1;
inode = NULL;
if (d_is_positive(dentry))
inode = d_inode(dentry);
if (inode
&& au_test_aufs(dentry->d_sb)
&& bindex >= 0
&& !au_is_bad_inode(inode)) {
iinfo = au_ii(inode);
hi = au_hinode(iinfo, bindex);
hn = !!au_hn(hi);
wh = hi->hi_whdentry;
}
do_pri_inode(bindex, inode, hn, wh);
return 0;
}
void au_dpri_dentry(struct dentry *dentry)
{
struct au_dinfo *dinfo;
aufs_bindex_t bindex;
int err;
err = do_pri_dentry(-1, dentry);
if (err || !au_test_aufs(dentry->d_sb))
return;
dinfo = au_di(dentry);
if (!dinfo)
return;
dpri("d-1: btop %d, bbot %d, bwh %d, bdiropq %d, gen %d, tmp %d\n",
dinfo->di_btop, dinfo->di_bbot,
dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry),
dinfo->di_tmpfile);
if (dinfo->di_btop < 0)
return;
for (bindex = dinfo->di_btop; bindex <= dinfo->di_bbot; bindex++)
do_pri_dentry(bindex, au_hdentry(dinfo, bindex)->hd_dentry);
}
static int do_pri_file(aufs_bindex_t bindex, struct file *file)
{
char a[32];
if (!file || IS_ERR(file)) {
dpri("f%d: err %ld\n", bindex, PTR_ERR(file));
return -1;
}
a[0] = 0;
if (bindex < 0
&& !IS_ERR_OR_NULL(file->f_path.dentry)
&& au_test_aufs(file->f_path.dentry->d_sb)
&& au_fi(file))
snprintf(a, sizeof(a), ", gen %d, mmapped %d",
au_figen(file), atomic_read(&au_fi(file)->fi_mmapped));
dpri("f%d: mode 0x%x, flags 0%o, cnt %ld, v %llu, pos %llu%s\n",
bindex, file->f_mode, file->f_flags, (long)file_count(file),
file->f_version, file->f_pos, a);
if (!IS_ERR_OR_NULL(file->f_path.dentry))
do_pri_dentry(bindex, file->f_path.dentry);
return 0;
}
void au_dpri_file(struct file *file)
{
struct au_finfo *finfo;
struct au_fidir *fidir;
struct au_hfile *hfile;
aufs_bindex_t bindex;
int err;
err = do_pri_file(-1, file);
if (err
|| IS_ERR_OR_NULL(file->f_path.dentry)
|| !au_test_aufs(file->f_path.dentry->d_sb))
return;
finfo = au_fi(file);
if (!finfo)
return;
if (finfo->fi_btop < 0)
return;
fidir = finfo->fi_hdir;
if (!fidir)
do_pri_file(finfo->fi_btop, finfo->fi_htop.hf_file);
else
for (bindex = finfo->fi_btop;
bindex >= 0 && bindex <= fidir->fd_bbot;
bindex++) {
hfile = fidir->fd_hfile + bindex;
do_pri_file(bindex, hfile ? hfile->hf_file : NULL);
}
}
static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
{
struct vfsmount *mnt;
struct super_block *sb;
if (!br || IS_ERR(br))
goto out;
mnt = au_br_mnt(br);
if (!mnt || IS_ERR(mnt))
goto out;
sb = mnt->mnt_sb;
if (!sb || IS_ERR(sb))
goto out;
dpri("s%d: {perm 0x%x, id %d, wbr %p}, "
"%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
"xino %d\n",
bindex, br->br_perm, br->br_id, br->br_wbr,
au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
sb->s_flags, sb->s_count,
atomic_read(&sb->s_active),
!!au_xino_file(br->br_xino, /*idx*/-1));
return 0;
out:
dpri("s%d: err %ld\n", bindex, PTR_ERR(br));
return -1;
}
void au_dpri_sb(struct super_block *sb)
{
struct au_sbinfo *sbinfo;
aufs_bindex_t bindex;
int err;
/* to reduce stack size */
struct {
struct vfsmount mnt;
struct au_branch fake;
} *a;
/* this function can be called from magic sysrq */
a = kzalloc(sizeof(*a), GFP_ATOMIC);
if (unlikely(!a)) {
dpri("no memory\n");
return;
}
a->mnt.mnt_sb = sb;
a->fake.br_path.mnt = &a->mnt;
err = do_pri_br(-1, &a->fake);
au_kfree_rcu(a);
dpri("dev 0x%x\n", sb->s_dev);
if (err || !au_test_aufs(sb))
return;
sbinfo = au_sbi(sb);
if (!sbinfo)
return;
dpri("nw %d, gen %u, kobj %d\n",
atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
kref_read(&sbinfo->si_kobj.kref));
for (bindex = 0; bindex <= sbinfo->si_bbot; bindex++)
do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
}
/* ---------------------------------------------------------------------- */
void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
{
struct inode *h_inode, *inode = d_inode(dentry);
struct dentry *h_dentry;
aufs_bindex_t bindex, bbot, bi;
if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
return;
bbot = au_dbbot(dentry);
bi = au_ibbot(inode);
if (bi < bbot)
bbot = bi;
bindex = au_dbtop(dentry);
bi = au_ibtop(inode);
if (bi > bindex)
bindex = bi;
for (; bindex <= bbot; bindex++) {
h_dentry = au_h_dptr(dentry, bindex);
if (!h_dentry)
continue;
h_inode = au_h_iptr(inode, bindex);
if (unlikely(h_inode != d_inode(h_dentry))) {
au_debug_on();
AuDbg("b%d, %s:%d\n", bindex, func, line);
AuDbgDentry(dentry);
AuDbgInode(inode);
au_debug_off();
BUG();
}
}
}
void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen)
{
int err, i, j;
struct au_dcsub_pages dpages;
struct au_dpage *dpage;
struct dentry **dentries;
err = au_dpages_init(&dpages, GFP_NOFS);
AuDebugOn(err);
err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/1);
AuDebugOn(err);
for (i = dpages.ndpage - 1; !err && i >= 0; i--) {
dpage = dpages.dpages + i;
dentries = dpage->dentries;
for (j = dpage->ndentry - 1; !err && j >= 0; j--)
AuDebugOn(au_digen_test(dentries[j], sigen));
}
au_dpages_free(&dpages);
}
void au_dbg_verify_kthread(void)
{
if (au_wkq_test()) {
au_dbg_blocked();
/*
* It may be recursive, but udba=notify between two aufs mounts,
* where a single ro branch is shared, is not a problem.
*/
/* WARN_ON(1); */
}
}
/* ---------------------------------------------------------------------- */
int __init au_debug_init(void)
{
aufs_bindex_t bindex;
struct au_vdir_destr destr;
bindex = -1;
AuDebugOn(bindex >= 0);
destr.len = -1;
AuDebugOn(destr.len < NAME_MAX);
#ifdef CONFIG_4KSTACKS
pr_warn("CONFIG_4KSTACKS is defined.\n");
#endif
return 0;
}

226
fs/aufs/debug.h Normal file
View File

@ -0,0 +1,226 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* debug print functions
*/
#ifndef __AUFS_DEBUG_H__
#define __AUFS_DEBUG_H__
#ifdef __KERNEL__
#include <linux/atomic.h>
#include <linux/module.h>
#include <linux/kallsyms.h>
#include <linux/sysrq.h>
#ifdef CONFIG_AUFS_DEBUG
#define AuDebugOn(a) BUG_ON(a)
/* module parameter */
extern atomic_t aufs_debug;
static inline void au_debug_on(void)
{
atomic_inc(&aufs_debug);
}
static inline void au_debug_off(void)
{
atomic_dec_if_positive(&aufs_debug);
}
static inline int au_debug_test(void)
{
return atomic_read(&aufs_debug) > 0;
}
#else
#define AuDebugOn(a) do {} while (0)
AuStubVoid(au_debug_on, void)
AuStubVoid(au_debug_off, void)
AuStubInt0(au_debug_test, void)
#endif /* CONFIG_AUFS_DEBUG */
#define param_check_atomic_t(name, p) __param_check(name, p, atomic_t)
/* ---------------------------------------------------------------------- */
/* debug print */
#define AuDbg(fmt, ...) do { \
if (au_debug_test()) \
pr_debug("DEBUG: " fmt, ##__VA_ARGS__); \
} while (0)
#define AuLabel(l) AuDbg(#l "\n")
#define AuIOErr(fmt, ...) pr_err("I/O Error, " fmt, ##__VA_ARGS__)
#define AuWarn1(fmt, ...) do { \
static unsigned char _c; \
if (!_c++) \
pr_warn(fmt, ##__VA_ARGS__); \
} while (0)
#define AuErr1(fmt, ...) do { \
static unsigned char _c; \
if (!_c++) \
pr_err(fmt, ##__VA_ARGS__); \
} while (0)
#define AuIOErr1(fmt, ...) do { \
static unsigned char _c; \
if (!_c++) \
AuIOErr(fmt, ##__VA_ARGS__); \
} while (0)
#define AuUnsupportMsg "This operation is not supported." \
" Please report this application to aufs-users ML."
#define AuUnsupport(fmt, ...) do { \
pr_err(AuUnsupportMsg "\n" fmt, ##__VA_ARGS__); \
dump_stack(); \
} while (0)
#define AuTraceErr(e) do { \
if (unlikely((e) < 0)) \
AuDbg("err %d\n", (int)(e)); \
} while (0)
#define AuTraceErrPtr(p) do { \
if (IS_ERR(p)) \
AuDbg("err %ld\n", PTR_ERR(p)); \
} while (0)
/* dirty macros for debug print, use with "%.*s" and caution */
#define AuLNPair(qstr) (qstr)->len, (qstr)->name
/* ---------------------------------------------------------------------- */
struct dentry;
#ifdef CONFIG_AUFS_DEBUG
extern struct mutex au_dbg_mtx;
extern char *au_plevel;
struct au_nhash;
void au_dpri_whlist(struct au_nhash *whlist);
struct au_vdir;
void au_dpri_vdir(struct au_vdir *vdir);
struct inode;
void au_dpri_inode(struct inode *inode);
void au_dpri_dalias(struct inode *inode);
void au_dpri_dentry(struct dentry *dentry);
struct file;
void au_dpri_file(struct file *filp);
struct super_block;
void au_dpri_sb(struct super_block *sb);
#define au_dbg_verify_dinode(d) __au_dbg_verify_dinode(d, __func__, __LINE__)
void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line);
void au_dbg_verify_gen(struct dentry *parent, unsigned int sigen);
void au_dbg_verify_kthread(void);
int __init au_debug_init(void);
#define AuDbgWhlist(w) do { \
mutex_lock(&au_dbg_mtx); \
AuDbg(#w "\n"); \
au_dpri_whlist(w); \
mutex_unlock(&au_dbg_mtx); \
} while (0)
#define AuDbgVdir(v) do { \
mutex_lock(&au_dbg_mtx); \
AuDbg(#v "\n"); \
au_dpri_vdir(v); \
mutex_unlock(&au_dbg_mtx); \
} while (0)
#define AuDbgInode(i) do { \
mutex_lock(&au_dbg_mtx); \
AuDbg(#i "\n"); \
au_dpri_inode(i); \
mutex_unlock(&au_dbg_mtx); \
} while (0)
#define AuDbgDAlias(i) do { \
mutex_lock(&au_dbg_mtx); \
AuDbg(#i "\n"); \
au_dpri_dalias(i); \
mutex_unlock(&au_dbg_mtx); \
} while (0)
#define AuDbgDentry(d) do { \
mutex_lock(&au_dbg_mtx); \
AuDbg(#d "\n"); \
au_dpri_dentry(d); \
mutex_unlock(&au_dbg_mtx); \
} while (0)
#define AuDbgFile(f) do { \
mutex_lock(&au_dbg_mtx); \
AuDbg(#f "\n"); \
au_dpri_file(f); \
mutex_unlock(&au_dbg_mtx); \
} while (0)
#define AuDbgSb(sb) do { \
mutex_lock(&au_dbg_mtx); \
AuDbg(#sb "\n"); \
au_dpri_sb(sb); \
mutex_unlock(&au_dbg_mtx); \
} while (0)
#define AuDbgSym(addr) do { \
char sym[KSYM_SYMBOL_LEN]; \
sprint_symbol(sym, (unsigned long)addr); \
AuDbg("%s\n", sym); \
} while (0)
#else
AuStubVoid(au_dbg_verify_dinode, struct dentry *dentry)
AuStubVoid(au_dbg_verify_gen, struct dentry *parent, unsigned int sigen)
AuStubVoid(au_dbg_verify_kthread, void)
AuStubInt0(__init au_debug_init, void)
#define AuDbgWhlist(w) do {} while (0)
#define AuDbgVdir(v) do {} while (0)
#define AuDbgInode(i) do {} while (0)
#define AuDbgDAlias(i) do {} while (0)
#define AuDbgDentry(d) do {} while (0)
#define AuDbgFile(f) do {} while (0)
#define AuDbgSb(sb) do {} while (0)
#define AuDbgSym(addr) do {} while (0)
#endif /* CONFIG_AUFS_DEBUG */
/* ---------------------------------------------------------------------- */
#ifdef CONFIG_AUFS_MAGIC_SYSRQ
int __init au_sysrq_init(void);
void au_sysrq_fin(void);
#ifdef CONFIG_HW_CONSOLE
#define au_dbg_blocked() do { \
WARN_ON(1); \
handle_sysrq('w'); \
} while (0)
#else
AuStubVoid(au_dbg_blocked, void)
#endif
#else
AuStubInt0(__init au_sysrq_init, void)
AuStubVoid(au_sysrq_fin, void)
AuStubVoid(au_dbg_blocked, void)
#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
#endif /* __KERNEL__ */
#endif /* __AUFS_DEBUG_H__ */

1154
fs/aufs/dentry.c Normal file

File diff suppressed because it is too large Load Diff

268
fs/aufs/dentry.h Normal file
View File

@ -0,0 +1,268 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* lookup and dentry operations
*/
#ifndef __AUFS_DENTRY_H__
#define __AUFS_DENTRY_H__
#ifdef __KERNEL__
#include <linux/dcache.h>
#include "dirren.h"
#include "rwsem.h"
struct au_hdentry {
struct dentry *hd_dentry;
aufs_bindex_t hd_id;
};
struct au_dinfo {
atomic_t di_generation;
struct au_rwsem di_rwsem;
aufs_bindex_t di_btop, di_bbot, di_bwh, di_bdiropq;
unsigned char di_tmpfile; /* to allow the different name */
struct au_hdentry *di_hdentry;
struct rcu_head rcu;
} ____cacheline_aligned_in_smp;
/* ---------------------------------------------------------------------- */
/* flags for au_lkup_dentry() */
#define AuLkup_ALLOW_NEG 1
#define AuLkup_IGNORE_PERM (1 << 1)
#define AuLkup_DIRREN (1 << 2)
#define au_ftest_lkup(flags, name) ((flags) & AuLkup_##name)
#define au_fset_lkup(flags, name) \
do { (flags) |= AuLkup_##name; } while (0)
#define au_fclr_lkup(flags, name) \
do { (flags) &= ~AuLkup_##name; } while (0)
#ifndef CONFIG_AUFS_DIRREN
#undef AuLkup_DIRREN
#define AuLkup_DIRREN 0
#endif
struct au_do_lookup_args {
unsigned int flags;
mode_t type;
struct qstr whname, *name;
struct au_dr_lookup dirren;
};
/* ---------------------------------------------------------------------- */
/* dentry.c */
extern const struct dentry_operations aufs_dop, aufs_dop_noreval;
struct au_branch;
struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent);
int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
struct dentry *h_parent, struct au_branch *br);
int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t btop,
unsigned int flags);
int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh);
int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
void au_refresh_dop(struct dentry *dentry, int force_reval);
/* dinfo.c */
void au_di_init_once(void *_di);
struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc);
void au_di_free(struct au_dinfo *dinfo);
void au_di_swap(struct au_dinfo *a, struct au_dinfo *b);
void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src);
int au_di_init(struct dentry *dentry);
void au_di_fin(struct dentry *dentry);
int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink);
void di_read_lock(struct dentry *d, int flags, unsigned int lsc);
void di_read_unlock(struct dentry *d, int flags);
void di_downgrade_lock(struct dentry *d, int flags);
void di_write_lock(struct dentry *d, unsigned int lsc);
void di_write_unlock(struct dentry *d);
void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir);
void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir);
void di_write_unlock2(struct dentry *d1, struct dentry *d2);
struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex);
struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex);
aufs_bindex_t au_dbtail(struct dentry *dentry);
aufs_bindex_t au_dbtaildir(struct dentry *dentry);
void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
struct dentry *h_dentry);
int au_digen_test(struct dentry *dentry, unsigned int sigen);
int au_dbrange_test(struct dentry *dentry);
void au_update_digen(struct dentry *dentry);
void au_update_dbrange(struct dentry *dentry, int do_put_zero);
void au_update_dbtop(struct dentry *dentry);
void au_update_dbbot(struct dentry *dentry);
int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
/* ---------------------------------------------------------------------- */
static inline struct au_dinfo *au_di(struct dentry *dentry)
{
return dentry->d_fsdata;
}
/* ---------------------------------------------------------------------- */
/* lock subclass for dinfo */
enum {
AuLsc_DI_CHILD, /* child first */
AuLsc_DI_CHILD2, /* rename(2), link(2), and cpup at hnotify */
AuLsc_DI_CHILD3, /* copyup dirs */
AuLsc_DI_PARENT,
AuLsc_DI_PARENT2,
AuLsc_DI_PARENT3,
AuLsc_DI_TMP /* temp for replacing dinfo */
};
/*
* di_read_lock_child, di_write_lock_child,
* di_read_lock_child2, di_write_lock_child2,
* di_read_lock_child3, di_write_lock_child3,
* di_read_lock_parent, di_write_lock_parent,
* di_read_lock_parent2, di_write_lock_parent2,
* di_read_lock_parent3, di_write_lock_parent3,
*/
#define AuReadLockFunc(name, lsc) \
static inline void di_read_lock_##name(struct dentry *d, int flags) \
{ di_read_lock(d, flags, AuLsc_DI_##lsc); }
#define AuWriteLockFunc(name, lsc) \
static inline void di_write_lock_##name(struct dentry *d) \
{ di_write_lock(d, AuLsc_DI_##lsc); }
#define AuRWLockFuncs(name, lsc) \
AuReadLockFunc(name, lsc) \
AuWriteLockFunc(name, lsc)
AuRWLockFuncs(child, CHILD);
AuRWLockFuncs(child2, CHILD2);
AuRWLockFuncs(child3, CHILD3);
AuRWLockFuncs(parent, PARENT);
AuRWLockFuncs(parent2, PARENT2);
AuRWLockFuncs(parent3, PARENT3);
#undef AuReadLockFunc
#undef AuWriteLockFunc
#undef AuRWLockFuncs
#define DiMustNoWaiters(d) AuRwMustNoWaiters(&au_di(d)->di_rwsem)
#define DiMustAnyLock(d) AuRwMustAnyLock(&au_di(d)->di_rwsem)
#define DiMustWriteLock(d) AuRwMustWriteLock(&au_di(d)->di_rwsem)
/* ---------------------------------------------------------------------- */
/* todo: memory barrier? */
static inline unsigned int au_digen(struct dentry *d)
{
return atomic_read(&au_di(d)->di_generation);
}
static inline void au_h_dentry_init(struct au_hdentry *hdentry)
{
hdentry->hd_dentry = NULL;
}
static inline struct au_hdentry *au_hdentry(struct au_dinfo *di,
aufs_bindex_t bindex)
{
return di->di_hdentry + bindex;
}
static inline void au_hdput(struct au_hdentry *hd)
{
if (hd)
dput(hd->hd_dentry);
}
static inline aufs_bindex_t au_dbtop(struct dentry *dentry)
{
DiMustAnyLock(dentry);
return au_di(dentry)->di_btop;
}
static inline aufs_bindex_t au_dbbot(struct dentry *dentry)
{
DiMustAnyLock(dentry);
return au_di(dentry)->di_bbot;
}
static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
{
DiMustAnyLock(dentry);
return au_di(dentry)->di_bwh;
}
static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
{
DiMustAnyLock(dentry);
return au_di(dentry)->di_bdiropq;
}
/* todo: hard/soft set? */
static inline void au_set_dbtop(struct dentry *dentry, aufs_bindex_t bindex)
{
DiMustWriteLock(dentry);
au_di(dentry)->di_btop = bindex;
}
static inline void au_set_dbbot(struct dentry *dentry, aufs_bindex_t bindex)
{
DiMustWriteLock(dentry);
au_di(dentry)->di_bbot = bindex;
}
static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
{
DiMustWriteLock(dentry);
/* dbwh can be outside of btop - bbot range */
au_di(dentry)->di_bwh = bindex;
}
static inline void au_set_dbdiropq(struct dentry *dentry, aufs_bindex_t bindex)
{
DiMustWriteLock(dentry);
au_di(dentry)->di_bdiropq = bindex;
}
/* ---------------------------------------------------------------------- */
#ifdef CONFIG_AUFS_HNOTIFY
static inline void au_digen_dec(struct dentry *d)
{
atomic_dec(&au_di(d)->di_generation);
}
static inline void au_hn_di_reinit(struct dentry *dentry)
{
dentry->d_fsdata = NULL;
}
#else
AuStubVoid(au_hn_di_reinit, struct dentry *dentry __maybe_unused)
#endif /* CONFIG_AUFS_HNOTIFY */
#endif /* __KERNEL__ */
#endif /* __AUFS_DENTRY_H__ */

554
fs/aufs/dinfo.c Normal file
View File

@ -0,0 +1,554 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* dentry private data
*/
#include "aufs.h"
void au_di_init_once(void *_dinfo)
{
struct au_dinfo *dinfo = _dinfo;
au_rw_init(&dinfo->di_rwsem);
}
struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
{
struct au_dinfo *dinfo;
int nbr, i;
dinfo = au_cache_alloc_dinfo();
if (unlikely(!dinfo))
goto out;
nbr = au_sbbot(sb) + 1;
if (nbr <= 0)
nbr = 1;
dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
if (dinfo->di_hdentry) {
au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
dinfo->di_btop = -1;
dinfo->di_bbot = -1;
dinfo->di_bwh = -1;
dinfo->di_bdiropq = -1;
dinfo->di_tmpfile = 0;
for (i = 0; i < nbr; i++)
dinfo->di_hdentry[i].hd_id = -1;
goto out;
}
au_cache_free_dinfo(dinfo);
dinfo = NULL;
out:
return dinfo;
}
void au_di_free(struct au_dinfo *dinfo)
{
struct au_hdentry *p;
aufs_bindex_t bbot, bindex;
/* dentry may not be revalidated */
bindex = dinfo->di_btop;
if (bindex >= 0) {
bbot = dinfo->di_bbot;
p = au_hdentry(dinfo, bindex);
while (bindex++ <= bbot)
au_hdput(p++);
}
au_kfree_try_rcu(dinfo->di_hdentry);
au_cache_free_dinfo(dinfo);
}
void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
{
struct au_hdentry *p;
aufs_bindex_t bi;
AuRwMustWriteLock(&a->di_rwsem);
AuRwMustWriteLock(&b->di_rwsem);
#define DiSwap(v, name) \
do { \
v = a->di_##name; \
a->di_##name = b->di_##name; \
b->di_##name = v; \
} while (0)
DiSwap(p, hdentry);
DiSwap(bi, btop);
DiSwap(bi, bbot);
DiSwap(bi, bwh);
DiSwap(bi, bdiropq);
/* smp_mb(); */
#undef DiSwap
}
void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
{
AuRwMustWriteLock(&dst->di_rwsem);
AuRwMustWriteLock(&src->di_rwsem);
dst->di_btop = src->di_btop;
dst->di_bbot = src->di_bbot;
dst->di_bwh = src->di_bwh;
dst->di_bdiropq = src->di_bdiropq;
/* smp_mb(); */
}
int au_di_init(struct dentry *dentry)
{
int err;
struct super_block *sb;
struct au_dinfo *dinfo;
err = 0;
sb = dentry->d_sb;
dinfo = au_di_alloc(sb, AuLsc_DI_CHILD);
if (dinfo) {
atomic_set(&dinfo->di_generation, au_sigen(sb));
/* smp_mb(); */ /* atomic_set */
dentry->d_fsdata = dinfo;
} else
err = -ENOMEM;
return err;
}
void au_di_fin(struct dentry *dentry)
{
struct au_dinfo *dinfo;
dinfo = au_di(dentry);
AuRwDestroy(&dinfo->di_rwsem);
au_di_free(dinfo);
}
int au_di_realloc(struct au_dinfo *dinfo, int nbr, int may_shrink)
{
int err, sz;
struct au_hdentry *hdp;
AuRwMustWriteLock(&dinfo->di_rwsem);
err = -ENOMEM;
sz = sizeof(*hdp) * (dinfo->di_bbot + 1);
if (!sz)
sz = sizeof(*hdp);
hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS,
may_shrink);
if (hdp) {
dinfo->di_hdentry = hdp;
err = 0;
}
return err;
}
/* ---------------------------------------------------------------------- */
static void do_ii_write_lock(struct inode *inode, unsigned int lsc)
{
switch (lsc) {
case AuLsc_DI_CHILD:
ii_write_lock_child(inode);
break;
case AuLsc_DI_CHILD2:
ii_write_lock_child2(inode);
break;
case AuLsc_DI_CHILD3:
ii_write_lock_child3(inode);
break;
case AuLsc_DI_PARENT:
ii_write_lock_parent(inode);
break;
case AuLsc_DI_PARENT2:
ii_write_lock_parent2(inode);
break;
case AuLsc_DI_PARENT3:
ii_write_lock_parent3(inode);
break;
default:
BUG();
}
}
static void do_ii_read_lock(struct inode *inode, unsigned int lsc)
{
switch (lsc) {
case AuLsc_DI_CHILD:
ii_read_lock_child(inode);
break;
case AuLsc_DI_CHILD2:
ii_read_lock_child2(inode);
break;
case AuLsc_DI_CHILD3:
ii_read_lock_child3(inode);
break;
case AuLsc_DI_PARENT:
ii_read_lock_parent(inode);
break;
case AuLsc_DI_PARENT2:
ii_read_lock_parent2(inode);
break;
case AuLsc_DI_PARENT3:
ii_read_lock_parent3(inode);
break;
default:
BUG();
}
}
void di_read_lock(struct dentry *d, int flags, unsigned int lsc)
{
struct inode *inode;
au_rw_read_lock_nested(&au_di(d)->di_rwsem, lsc);
if (d_really_is_positive(d)) {
inode = d_inode(d);
if (au_ftest_lock(flags, IW))
do_ii_write_lock(inode, lsc);
else if (au_ftest_lock(flags, IR))
do_ii_read_lock(inode, lsc);
}
}
void di_read_unlock(struct dentry *d, int flags)
{
struct inode *inode;
if (d_really_is_positive(d)) {
inode = d_inode(d);
if (au_ftest_lock(flags, IW)) {
au_dbg_verify_dinode(d);
ii_write_unlock(inode);
} else if (au_ftest_lock(flags, IR)) {
au_dbg_verify_dinode(d);
ii_read_unlock(inode);
}
}
au_rw_read_unlock(&au_di(d)->di_rwsem);
}
void di_downgrade_lock(struct dentry *d, int flags)
{
if (d_really_is_positive(d) && au_ftest_lock(flags, IR))
ii_downgrade_lock(d_inode(d));
au_rw_dgrade_lock(&au_di(d)->di_rwsem);
}
void di_write_lock(struct dentry *d, unsigned int lsc)
{
au_rw_write_lock_nested(&au_di(d)->di_rwsem, lsc);
if (d_really_is_positive(d))
do_ii_write_lock(d_inode(d), lsc);
}
void di_write_unlock(struct dentry *d)
{
au_dbg_verify_dinode(d);
if (d_really_is_positive(d))
ii_write_unlock(d_inode(d));
au_rw_write_unlock(&au_di(d)->di_rwsem);
}
void di_write_lock2_child(struct dentry *d1, struct dentry *d2, int isdir)
{
AuDebugOn(d1 == d2
|| d_inode(d1) == d_inode(d2)
|| d1->d_sb != d2->d_sb);
if ((isdir && au_test_subdir(d1, d2))
|| d1 < d2) {
di_write_lock_child(d1);
di_write_lock_child2(d2);
} else {
di_write_lock_child(d2);
di_write_lock_child2(d1);
}
}
void di_write_lock2_parent(struct dentry *d1, struct dentry *d2, int isdir)
{
AuDebugOn(d1 == d2
|| d_inode(d1) == d_inode(d2)
|| d1->d_sb != d2->d_sb);
if ((isdir && au_test_subdir(d1, d2))
|| d1 < d2) {
di_write_lock_parent(d1);
di_write_lock_parent2(d2);
} else {
di_write_lock_parent(d2);
di_write_lock_parent2(d1);
}
}
void di_write_unlock2(struct dentry *d1, struct dentry *d2)
{
di_write_unlock(d1);
if (d_inode(d1) == d_inode(d2))
au_rw_write_unlock(&au_di(d2)->di_rwsem);
else
di_write_unlock(d2);
}
/* ---------------------------------------------------------------------- */
struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
{
struct dentry *d;
DiMustAnyLock(dentry);
if (au_dbtop(dentry) < 0 || bindex < au_dbtop(dentry))
return NULL;
AuDebugOn(bindex < 0);
d = au_hdentry(au_di(dentry), bindex)->hd_dentry;
AuDebugOn(d && au_dcount(d) <= 0);
return d;
}
/*
* extended version of au_h_dptr().
* returns a hashed and positive (or linkable) h_dentry in bindex, NULL, or
* error.
*/
struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
{
struct dentry *h_dentry;
struct inode *inode, *h_inode;
AuDebugOn(d_really_is_negative(dentry));
h_dentry = NULL;
if (au_dbtop(dentry) <= bindex
&& bindex <= au_dbbot(dentry))
h_dentry = au_h_dptr(dentry, bindex);
if (h_dentry && !au_d_linkable(h_dentry)) {
dget(h_dentry);
goto out; /* success */
}
inode = d_inode(dentry);
AuDebugOn(bindex < au_ibtop(inode));
AuDebugOn(au_ibbot(inode) < bindex);
h_inode = au_h_iptr(inode, bindex);
h_dentry = d_find_alias(h_inode);
if (h_dentry) {
if (!IS_ERR(h_dentry)) {
if (!au_d_linkable(h_dentry))
goto out; /* success */
dput(h_dentry);
} else
goto out;
}
if (au_opt_test(au_mntflags(dentry->d_sb), PLINK)) {
h_dentry = au_plink_lkup(inode, bindex);
AuDebugOn(!h_dentry);
if (!IS_ERR(h_dentry)) {
if (!au_d_hashed_positive(h_dentry))
goto out; /* success */
dput(h_dentry);
h_dentry = NULL;
}
}
out:
AuDbgDentry(h_dentry);
return h_dentry;
}
aufs_bindex_t au_dbtail(struct dentry *dentry)
{
aufs_bindex_t bbot, bwh;
bbot = au_dbbot(dentry);
if (0 <= bbot) {
bwh = au_dbwh(dentry);
if (!bwh)
return bwh;
if (0 < bwh && bwh < bbot)
return bwh - 1;
}
return bbot;
}
aufs_bindex_t au_dbtaildir(struct dentry *dentry)
{
aufs_bindex_t bbot, bopq;
bbot = au_dbtail(dentry);
if (0 <= bbot) {
bopq = au_dbdiropq(dentry);
if (0 <= bopq && bopq < bbot)
bbot = bopq;
}
return bbot;
}
/* ---------------------------------------------------------------------- */
void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
struct dentry *h_dentry)
{
struct au_dinfo *dinfo;
struct au_hdentry *hd;
struct au_branch *br;
DiMustWriteLock(dentry);
dinfo = au_di(dentry);
hd = au_hdentry(dinfo, bindex);
au_hdput(hd);
hd->hd_dentry = h_dentry;
if (h_dentry) {
br = au_sbr(dentry->d_sb, bindex);
hd->hd_id = br->br_id;
}
}
int au_dbrange_test(struct dentry *dentry)
{
int err;
aufs_bindex_t btop, bbot;
err = 0;
btop = au_dbtop(dentry);
bbot = au_dbbot(dentry);
if (btop >= 0)
AuDebugOn(bbot < 0 && btop > bbot);
else {
err = -EIO;
AuDebugOn(bbot >= 0);
}
return err;
}
int au_digen_test(struct dentry *dentry, unsigned int sigen)
{
int err;
err = 0;
if (unlikely(au_digen(dentry) != sigen
|| au_iigen_test(d_inode(dentry), sigen)))
err = -EIO;
return err;
}
void au_update_digen(struct dentry *dentry)
{
atomic_set(&au_di(dentry)->di_generation, au_sigen(dentry->d_sb));
/* smp_mb(); */ /* atomic_set */
}
void au_update_dbrange(struct dentry *dentry, int do_put_zero)
{
struct au_dinfo *dinfo;
struct dentry *h_d;
struct au_hdentry *hdp;
aufs_bindex_t bindex, bbot;
DiMustWriteLock(dentry);
dinfo = au_di(dentry);
if (!dinfo || dinfo->di_btop < 0)
return;
if (do_put_zero) {
bbot = dinfo->di_bbot;
bindex = dinfo->di_btop;
hdp = au_hdentry(dinfo, bindex);
for (; bindex <= bbot; bindex++, hdp++) {
h_d = hdp->hd_dentry;
if (h_d && d_is_negative(h_d))
au_set_h_dptr(dentry, bindex, NULL);
}
}
dinfo->di_btop = 0;
hdp = au_hdentry(dinfo, dinfo->di_btop);
for (; dinfo->di_btop <= dinfo->di_bbot; dinfo->di_btop++, hdp++)
if (hdp->hd_dentry)
break;
if (dinfo->di_btop > dinfo->di_bbot) {
dinfo->di_btop = -1;
dinfo->di_bbot = -1;
return;
}
hdp = au_hdentry(dinfo, dinfo->di_bbot);
for (; dinfo->di_bbot >= 0; dinfo->di_bbot--, hdp--)
if (hdp->hd_dentry)
break;
AuDebugOn(dinfo->di_btop > dinfo->di_bbot || dinfo->di_bbot < 0);
}
void au_update_dbtop(struct dentry *dentry)
{
aufs_bindex_t bindex, bbot;
struct dentry *h_dentry;
bbot = au_dbbot(dentry);
for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
h_dentry = au_h_dptr(dentry, bindex);
if (!h_dentry)
continue;
if (d_is_positive(h_dentry)) {
au_set_dbtop(dentry, bindex);
return;
}
au_set_h_dptr(dentry, bindex, NULL);
}
}
void au_update_dbbot(struct dentry *dentry)
{
aufs_bindex_t bindex, btop;
struct dentry *h_dentry;
btop = au_dbtop(dentry);
for (bindex = au_dbbot(dentry); bindex >= btop; bindex--) {
h_dentry = au_h_dptr(dentry, bindex);
if (!h_dentry)
continue;
if (d_is_positive(h_dentry)) {
au_set_dbbot(dentry, bindex);
return;
}
au_set_h_dptr(dentry, bindex, NULL);
}
}
int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
{
aufs_bindex_t bindex, bbot;
bbot = au_dbbot(dentry);
for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++)
if (au_h_dptr(dentry, bindex) == h_dentry)
return bindex;
return -1;
}

763
fs/aufs/dir.c Normal file
View File

@ -0,0 +1,763 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* directory operations
*/
#include <linux/fs_stack.h>
#include <linux/iversion.h>
#include "aufs.h"
void au_add_nlink(struct inode *dir, struct inode *h_dir)
{
unsigned int nlink;
AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
nlink = dir->i_nlink;
nlink += h_dir->i_nlink - 2;
if (h_dir->i_nlink < 2)
nlink += 2;
smp_mb(); /* for i_nlink */
/* 0 can happen in revaliding */
set_nlink(dir, nlink);
}
void au_sub_nlink(struct inode *dir, struct inode *h_dir)
{
unsigned int nlink;
AuDebugOn(!S_ISDIR(dir->i_mode) || !S_ISDIR(h_dir->i_mode));
nlink = dir->i_nlink;
nlink -= h_dir->i_nlink - 2;
if (h_dir->i_nlink < 2)
nlink -= 2;
smp_mb(); /* for i_nlink */
/* nlink == 0 means the branch-fs is broken */
set_nlink(dir, nlink);
}
loff_t au_dir_size(struct file *file, struct dentry *dentry)
{
loff_t sz;
aufs_bindex_t bindex, bbot;
struct file *h_file;
struct dentry *h_dentry;
sz = 0;
if (file) {
AuDebugOn(!d_is_dir(file->f_path.dentry));
bbot = au_fbbot_dir(file);
for (bindex = au_fbtop(file);
bindex <= bbot && sz < KMALLOC_MAX_SIZE;
bindex++) {
h_file = au_hf_dir(file, bindex);
if (h_file && file_inode(h_file))
sz += vfsub_f_size_read(h_file);
}
} else {
AuDebugOn(!dentry);
AuDebugOn(!d_is_dir(dentry));
bbot = au_dbtaildir(dentry);
for (bindex = au_dbtop(dentry);
bindex <= bbot && sz < KMALLOC_MAX_SIZE;
bindex++) {
h_dentry = au_h_dptr(dentry, bindex);
if (h_dentry && d_is_positive(h_dentry))
sz += i_size_read(d_inode(h_dentry));
}
}
if (sz < KMALLOC_MAX_SIZE)
sz = roundup_pow_of_two(sz);
if (sz > KMALLOC_MAX_SIZE)
sz = KMALLOC_MAX_SIZE;
else if (sz < NAME_MAX) {
BUILD_BUG_ON(AUFS_RDBLK_DEF < NAME_MAX);
sz = AUFS_RDBLK_DEF;
}
return sz;
}
struct au_dir_ts_arg {
struct dentry *dentry;
aufs_bindex_t brid;
};
static void au_do_dir_ts(void *arg)
{
struct au_dir_ts_arg *a = arg;
struct au_dtime dt;
struct path h_path;
struct inode *dir, *h_dir;
struct super_block *sb;
struct au_branch *br;
struct au_hinode *hdir;
int err;
aufs_bindex_t btop, bindex;
sb = a->dentry->d_sb;
if (d_really_is_negative(a->dentry))
goto out;
/* no dir->i_mutex lock */
aufs_read_lock(a->dentry, AuLock_DW); /* noflush */
dir = d_inode(a->dentry);
btop = au_ibtop(dir);
bindex = au_br_index(sb, a->brid);
if (bindex < btop)
goto out_unlock;
br = au_sbr(sb, bindex);
h_path.dentry = au_h_dptr(a->dentry, bindex);
if (!h_path.dentry)
goto out_unlock;
h_path.mnt = au_br_mnt(br);
au_dtime_store(&dt, a->dentry, &h_path);
br = au_sbr(sb, btop);
if (!au_br_writable(br->br_perm))
goto out_unlock;
h_path.dentry = au_h_dptr(a->dentry, btop);
h_path.mnt = au_br_mnt(br);
err = vfsub_mnt_want_write(h_path.mnt);
if (err)
goto out_unlock;
hdir = au_hi(dir, btop);
au_hn_inode_lock_nested(hdir, AuLsc_I_PARENT);
h_dir = au_h_iptr(dir, btop);
if (h_dir->i_nlink
&& timespec64_compare(&h_dir->i_mtime, &dt.dt_mtime) < 0) {
dt.dt_h_path = h_path;
au_dtime_revert(&dt);
}
au_hn_inode_unlock(hdir);
vfsub_mnt_drop_write(h_path.mnt);
au_cpup_attr_timesizes(dir);
out_unlock:
aufs_read_unlock(a->dentry, AuLock_DW);
out:
dput(a->dentry);
au_nwt_done(&au_sbi(sb)->si_nowait);
au_kfree_try_rcu(arg);
}
void au_dir_ts(struct inode *dir, aufs_bindex_t bindex)
{
int perm, wkq_err;
aufs_bindex_t btop;
struct au_dir_ts_arg *arg;
struct dentry *dentry;
struct super_block *sb;
IMustLock(dir);
dentry = d_find_any_alias(dir);
AuDebugOn(!dentry);
sb = dentry->d_sb;
btop = au_ibtop(dir);
if (btop == bindex) {
au_cpup_attr_timesizes(dir);
goto out;
}
perm = au_sbr_perm(sb, btop);
if (!au_br_writable(perm))
goto out;
arg = kmalloc(sizeof(*arg), GFP_NOFS);
if (!arg)
goto out;
arg->dentry = dget(dentry); /* will be dput-ted by au_do_dir_ts() */
arg->brid = au_sbr_id(sb, bindex);
wkq_err = au_wkq_nowait(au_do_dir_ts, arg, sb, /*flags*/0);
if (unlikely(wkq_err)) {
pr_err("wkq %d\n", wkq_err);
dput(dentry);
au_kfree_try_rcu(arg);
}
out:
dput(dentry);
}
/* ---------------------------------------------------------------------- */
static int reopen_dir(struct file *file)
{
int err;
unsigned int flags;
aufs_bindex_t bindex, btail, btop;
struct dentry *dentry, *h_dentry;
struct file *h_file;
/* open all lower dirs */
dentry = file->f_path.dentry;
btop = au_dbtop(dentry);
for (bindex = au_fbtop(file); bindex < btop; bindex++)
au_set_h_fptr(file, bindex, NULL);
au_set_fbtop(file, btop);
btail = au_dbtaildir(dentry);
for (bindex = au_fbbot_dir(file); btail < bindex; bindex--)
au_set_h_fptr(file, bindex, NULL);
au_set_fbbot_dir(file, btail);
flags = vfsub_file_flags(file);
for (bindex = btop; bindex <= btail; bindex++) {
h_dentry = au_h_dptr(dentry, bindex);
if (!h_dentry)
continue;
h_file = au_hf_dir(file, bindex);
if (h_file)
continue;
h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out; /* close all? */
au_set_h_fptr(file, bindex, h_file);
}
au_update_figen(file);
/* todo: necessary? */
/* file->f_ra = h_file->f_ra; */
err = 0;
out:
return err;
}
static int do_open_dir(struct file *file, int flags, struct file *h_file)
{
int err;
aufs_bindex_t bindex, btail;
struct dentry *dentry, *h_dentry;
struct vfsmount *mnt;
FiMustWriteLock(file);
AuDebugOn(h_file);
err = 0;
mnt = file->f_path.mnt;
dentry = file->f_path.dentry;
file->f_version = inode_query_iversion(d_inode(dentry));
bindex = au_dbtop(dentry);
au_set_fbtop(file, bindex);
btail = au_dbtaildir(dentry);
au_set_fbbot_dir(file, btail);
for (; !err && bindex <= btail; bindex++) {
h_dentry = au_h_dptr(dentry, bindex);
if (!h_dentry)
continue;
err = vfsub_test_mntns(mnt, h_dentry->d_sb);
if (unlikely(err))
break;
h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
if (IS_ERR(h_file)) {
err = PTR_ERR(h_file);
break;
}
au_set_h_fptr(file, bindex, h_file);
}
au_update_figen(file);
/* todo: necessary? */
/* file->f_ra = h_file->f_ra; */
if (!err)
return 0; /* success */
/* close all */
for (bindex = au_fbtop(file); bindex <= btail; bindex++)
au_set_h_fptr(file, bindex, NULL);
au_set_fbtop(file, -1);
au_set_fbbot_dir(file, -1);
return err;
}
static int aufs_open_dir(struct inode *inode __maybe_unused,
struct file *file)
{
int err;
struct super_block *sb;
struct au_fidir *fidir;
err = -ENOMEM;
sb = file->f_path.dentry->d_sb;
si_read_lock(sb, AuLock_FLUSH);
fidir = au_fidir_alloc(sb);
if (fidir) {
struct au_do_open_args args = {
.open = do_open_dir,
.fidir = fidir
};
err = au_do_open(file, &args);
if (unlikely(err))
au_kfree_rcu(fidir);
}
si_read_unlock(sb);
return err;
}
static int aufs_release_dir(struct inode *inode __maybe_unused,
struct file *file)
{
struct au_vdir *vdir_cache;
struct au_finfo *finfo;
struct au_fidir *fidir;
struct au_hfile *hf;
aufs_bindex_t bindex, bbot;
finfo = au_fi(file);
fidir = finfo->fi_hdir;
if (fidir) {
au_hbl_del(&finfo->fi_hlist,
&au_sbi(file->f_path.dentry->d_sb)->si_files);
vdir_cache = fidir->fd_vdir_cache; /* lock-free */
if (vdir_cache)
au_vdir_free(vdir_cache);
bindex = finfo->fi_btop;
if (bindex >= 0) {
hf = fidir->fd_hfile + bindex;
/*
* calls fput() instead of filp_close(),
* since no dnotify or lock for the lower file.
*/
bbot = fidir->fd_bbot;
for (; bindex <= bbot; bindex++, hf++)
if (hf->hf_file)
au_hfput(hf, /*execed*/0);
}
au_kfree_rcu(fidir);
finfo->fi_hdir = NULL;
}
au_finfo_fin(file);
return 0;
}
/* ---------------------------------------------------------------------- */
static int au_do_flush_dir(struct file *file, fl_owner_t id)
{
int err;
aufs_bindex_t bindex, bbot;
struct file *h_file;
err = 0;
bbot = au_fbbot_dir(file);
for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
h_file = au_hf_dir(file, bindex);
if (h_file)
err = vfsub_flush(h_file, id);
}
return err;
}
static int aufs_flush_dir(struct file *file, fl_owner_t id)
{
return au_do_flush(file, id, au_do_flush_dir);
}
/* ---------------------------------------------------------------------- */
static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
{
int err;
aufs_bindex_t bbot, bindex;
struct inode *inode;
struct super_block *sb;
err = 0;
sb = dentry->d_sb;
inode = d_inode(dentry);
IMustLock(inode);
bbot = au_dbbot(dentry);
for (bindex = au_dbtop(dentry); !err && bindex <= bbot; bindex++) {
struct path h_path;
if (au_test_ro(sb, bindex, inode))
continue;
h_path.dentry = au_h_dptr(dentry, bindex);
if (!h_path.dentry)
continue;
h_path.mnt = au_sbr_mnt(sb, bindex);
err = vfsub_fsync(NULL, &h_path, datasync);
}
return err;
}
static int au_do_fsync_dir(struct file *file, int datasync)
{
int err;
aufs_bindex_t bbot, bindex;
struct file *h_file;
struct super_block *sb;
struct inode *inode;
err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1, /*fi_lsc*/0);
if (unlikely(err))
goto out;
inode = file_inode(file);
sb = inode->i_sb;
bbot = au_fbbot_dir(file);
for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
h_file = au_hf_dir(file, bindex);
if (!h_file || au_test_ro(sb, bindex, inode))
continue;
err = vfsub_fsync(h_file, &h_file->f_path, datasync);
}
out:
return err;
}
/*
* @file may be NULL
*/
static int aufs_fsync_dir(struct file *file, loff_t start, loff_t end,
int datasync)
{
int err;
struct dentry *dentry;
struct inode *inode;
struct super_block *sb;
err = 0;
dentry = file->f_path.dentry;
inode = d_inode(dentry);
inode_lock(inode);
sb = dentry->d_sb;
si_noflush_read_lock(sb);
if (file)
err = au_do_fsync_dir(file, datasync);
else {
di_write_lock_child(dentry);
err = au_do_fsync_dir_no_file(dentry, datasync);
}
au_cpup_attr_timesizes(inode);
di_write_unlock(dentry);
if (file)
fi_write_unlock(file);
si_read_unlock(sb);
inode_unlock(inode);
return err;
}
/* ---------------------------------------------------------------------- */
static int aufs_iterate_shared(struct file *file, struct dir_context *ctx)
{
int err;
struct dentry *dentry;
struct inode *inode, *h_inode;
struct super_block *sb;
AuDbg("%pD, ctx{%ps, %llu}\n", file, ctx->actor, ctx->pos);
dentry = file->f_path.dentry;
inode = d_inode(dentry);
IMustLock(inode);
sb = dentry->d_sb;
si_read_lock(sb, AuLock_FLUSH);
err = au_reval_and_lock_fdi(file, reopen_dir, /*wlock*/1, /*fi_lsc*/0);
if (unlikely(err))
goto out;
err = au_alive_dir(dentry);
if (!err)
err = au_vdir_init(file);
di_downgrade_lock(dentry, AuLock_IR);
if (unlikely(err))
goto out_unlock;
h_inode = au_h_iptr(inode, au_ibtop(inode));
if (!au_test_nfsd()) {
err = au_vdir_fill_de(file, ctx);
fsstack_copy_attr_atime(inode, h_inode);
} else {
/*
* nfsd filldir may call lookup_one_len(), vfs_getattr(),
* encode_fh() and others.
*/
atomic_inc(&h_inode->i_count);
di_read_unlock(dentry, AuLock_IR);
si_read_unlock(sb);
err = au_vdir_fill_de(file, ctx);
fsstack_copy_attr_atime(inode, h_inode);
fi_write_unlock(file);
iput(h_inode);
AuTraceErr(err);
return err;
}
out_unlock:
di_read_unlock(dentry, AuLock_IR);
fi_write_unlock(file);
out:
si_read_unlock(sb);
return err;
}
/* ---------------------------------------------------------------------- */
#define AuTestEmpty_WHONLY 1
#define AuTestEmpty_CALLED (1 << 1)
#define AuTestEmpty_SHWH (1 << 2)
#define au_ftest_testempty(flags, name) ((flags) & AuTestEmpty_##name)
#define au_fset_testempty(flags, name) \
do { (flags) |= AuTestEmpty_##name; } while (0)
#define au_fclr_testempty(flags, name) \
do { (flags) &= ~AuTestEmpty_##name; } while (0)
#ifndef CONFIG_AUFS_SHWH
#undef AuTestEmpty_SHWH
#define AuTestEmpty_SHWH 0
#endif
struct test_empty_arg {
struct dir_context ctx;
struct au_nhash *whlist;
unsigned int flags;
int err;
aufs_bindex_t bindex;
};
static int test_empty_cb(struct dir_context *ctx, const char *__name,
int namelen, loff_t offset __maybe_unused, u64 ino,
unsigned int d_type)
{
struct test_empty_arg *arg = container_of(ctx, struct test_empty_arg,
ctx);
char *name = (void *)__name;
arg->err = 0;
au_fset_testempty(arg->flags, CALLED);
/* smp_mb(); */
if (name[0] == '.'
&& (namelen == 1 || (name[1] == '.' && namelen == 2)))
goto out; /* success */
if (namelen <= AUFS_WH_PFX_LEN
|| memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
if (au_ftest_testempty(arg->flags, WHONLY)
&& !au_nhash_test_known_wh(arg->whlist, name, namelen))
arg->err = -ENOTEMPTY;
goto out;
}
name += AUFS_WH_PFX_LEN;
namelen -= AUFS_WH_PFX_LEN;
if (!au_nhash_test_known_wh(arg->whlist, name, namelen))
arg->err = au_nhash_append_wh
(arg->whlist, name, namelen, ino, d_type, arg->bindex,
au_ftest_testempty(arg->flags, SHWH));
out:
/* smp_mb(); */
AuTraceErr(arg->err);
return arg->err;
}
static int do_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
{
int err;
struct file *h_file;
struct au_branch *br;
h_file = au_h_open(dentry, arg->bindex,
O_RDONLY | O_NONBLOCK | O_DIRECTORY | O_LARGEFILE,
/*file*/NULL, /*force_wr*/0);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
err = 0;
if (!au_opt_test(au_mntflags(dentry->d_sb), UDBA_NONE)
&& !file_inode(h_file)->i_nlink)
goto out_put;
do {
arg->err = 0;
au_fclr_testempty(arg->flags, CALLED);
/* smp_mb(); */
err = vfsub_iterate_dir(h_file, &arg->ctx);
if (err >= 0)
err = arg->err;
} while (!err && au_ftest_testempty(arg->flags, CALLED));
out_put:
fput(h_file);
br = au_sbr(dentry->d_sb, arg->bindex);
au_lcnt_dec(&br->br_nfiles);
out:
return err;
}
struct do_test_empty_args {
int *errp;
struct dentry *dentry;
struct test_empty_arg *arg;
};
static void call_do_test_empty(void *args)
{
struct do_test_empty_args *a = args;
*a->errp = do_test_empty(a->dentry, a->arg);
}
static int sio_test_empty(struct dentry *dentry, struct test_empty_arg *arg)
{
int err, wkq_err;
struct dentry *h_dentry;
struct inode *h_inode;
h_dentry = au_h_dptr(dentry, arg->bindex);
h_inode = d_inode(h_dentry);
/* todo: i_mode changes anytime? */
inode_lock_shared_nested(h_inode, AuLsc_I_CHILD);
err = au_test_h_perm_sio(h_inode, MAY_EXEC | MAY_READ);
inode_unlock_shared(h_inode);
if (!err)
err = do_test_empty(dentry, arg);
else {
struct do_test_empty_args args = {
.errp = &err,
.dentry = dentry,
.arg = arg
};
unsigned int flags = arg->flags;
wkq_err = au_wkq_wait(call_do_test_empty, &args);
if (unlikely(wkq_err))
err = wkq_err;
arg->flags = flags;
}
return err;
}
int au_test_empty_lower(struct dentry *dentry)
{
int err;
unsigned int rdhash;
aufs_bindex_t bindex, btop, btail;
struct au_nhash whlist;
struct test_empty_arg arg = {
.ctx = {
.actor = test_empty_cb
}
};
int (*test_empty)(struct dentry *dentry, struct test_empty_arg *arg);
SiMustAnyLock(dentry->d_sb);
rdhash = au_sbi(dentry->d_sb)->si_rdhash;
if (!rdhash)
rdhash = au_rdhash_est(au_dir_size(/*file*/NULL, dentry));
err = au_nhash_alloc(&whlist, rdhash, GFP_NOFS);
if (unlikely(err))
goto out;
arg.flags = 0;
arg.whlist = &whlist;
btop = au_dbtop(dentry);
if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
au_fset_testempty(arg.flags, SHWH);
test_empty = do_test_empty;
if (au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1))
test_empty = sio_test_empty;
arg.bindex = btop;
err = test_empty(dentry, &arg);
if (unlikely(err))
goto out_whlist;
au_fset_testempty(arg.flags, WHONLY);
btail = au_dbtaildir(dentry);
for (bindex = btop + 1; !err && bindex <= btail; bindex++) {
struct dentry *h_dentry;
h_dentry = au_h_dptr(dentry, bindex);
if (h_dentry && d_is_positive(h_dentry)) {
arg.bindex = bindex;
err = test_empty(dentry, &arg);
}
}
out_whlist:
au_nhash_wh_free(&whlist);
out:
return err;
}
int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
{
int err;
struct test_empty_arg arg = {
.ctx = {
.actor = test_empty_cb
}
};
aufs_bindex_t bindex, btail;
err = 0;
arg.whlist = whlist;
arg.flags = AuTestEmpty_WHONLY;
if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
au_fset_testempty(arg.flags, SHWH);
btail = au_dbtaildir(dentry);
for (bindex = au_dbtop(dentry); !err && bindex <= btail; bindex++) {
struct dentry *h_dentry;
h_dentry = au_h_dptr(dentry, bindex);
if (h_dentry && d_is_positive(h_dentry)) {
arg.bindex = bindex;
err = sio_test_empty(dentry, &arg);
}
}
return err;
}
/* ---------------------------------------------------------------------- */
const struct file_operations aufs_dir_fop = {
.owner = THIS_MODULE,
.llseek = default_llseek,
.read = generic_read_dir,
.iterate_shared = aufs_iterate_shared,
.unlocked_ioctl = aufs_ioctl_dir,
#ifdef CONFIG_COMPAT
.compat_ioctl = aufs_compat_ioctl_dir,
#endif
.open = aufs_open_dir,
.release = aufs_release_dir,
.flush = aufs_flush_dir,
.fsync = aufs_fsync_dir
};

134
fs/aufs/dir.h Normal file
View File

@ -0,0 +1,134 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* directory operations
*/
#ifndef __AUFS_DIR_H__
#define __AUFS_DIR_H__
#ifdef __KERNEL__
#include <linux/fs.h>
/* ---------------------------------------------------------------------- */
/* need to be faster and smaller */
struct au_nhash {
unsigned int nh_num;
struct hlist_head *nh_head;
};
struct au_vdir_destr {
unsigned char len;
unsigned char name[];
} __packed;
struct au_vdir_dehstr {
struct hlist_node hash;
struct au_vdir_destr *str;
struct rcu_head rcu;
} ____cacheline_aligned_in_smp;
struct au_vdir_de {
ino_t de_ino;
unsigned char de_type;
/* caution: packed */
struct au_vdir_destr de_str;
} __packed;
struct au_vdir_wh {
struct hlist_node wh_hash;
#ifdef CONFIG_AUFS_SHWH
ino_t wh_ino;
aufs_bindex_t wh_bindex;
unsigned char wh_type;
#else
aufs_bindex_t wh_bindex;
#endif
/* caution: packed */
struct au_vdir_destr wh_str;
} __packed;
union au_vdir_deblk_p {
unsigned char *deblk;
struct au_vdir_de *de;
};
struct au_vdir {
unsigned char **vd_deblk;
unsigned long vd_nblk;
struct {
unsigned long ul;
union au_vdir_deblk_p p;
} vd_last;
u64 vd_version;
unsigned int vd_deblk_sz;
unsigned long vd_jiffy;
struct rcu_head rcu;
} ____cacheline_aligned_in_smp;
/* ---------------------------------------------------------------------- */
/* dir.c */
extern const struct file_operations aufs_dir_fop;
void au_add_nlink(struct inode *dir, struct inode *h_dir);
void au_sub_nlink(struct inode *dir, struct inode *h_dir);
loff_t au_dir_size(struct file *file, struct dentry *dentry);
void au_dir_ts(struct inode *dir, aufs_bindex_t bsrc);
int au_test_empty_lower(struct dentry *dentry);
int au_test_empty(struct dentry *dentry, struct au_nhash *whlist);
/* vdir.c */
unsigned int au_rdhash_est(loff_t sz);
int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp);
void au_nhash_wh_free(struct au_nhash *whlist);
int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
int limit);
int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen);
int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
unsigned int d_type, aufs_bindex_t bindex,
unsigned char shwh);
void au_vdir_free(struct au_vdir *vdir);
int au_vdir_init(struct file *file);
int au_vdir_fill_de(struct file *file, struct dir_context *ctx);
/* ioctl.c */
long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_AUFS_RDU
/* rdu.c */
long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
long au_rdu_compat_ioctl(struct file *file, unsigned int cmd,
unsigned long arg);
#endif
#else
AuStub(long, au_rdu_ioctl, return -EINVAL, struct file *file,
unsigned int cmd, unsigned long arg)
#ifdef CONFIG_COMPAT
AuStub(long, au_rdu_compat_ioctl, return -EINVAL, struct file *file,
unsigned int cmd, unsigned long arg)
#endif
#endif
#endif /* __KERNEL__ */
#endif /* __AUFS_DIR_H__ */

1316
fs/aufs/dirren.c Normal file

File diff suppressed because it is too large Load Diff

140
fs/aufs/dirren.h Normal file
View File

@ -0,0 +1,140 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2017-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* renamed dir info
*/
#ifndef __AUFS_DIRREN_H__
#define __AUFS_DIRREN_H__
#ifdef __KERNEL__
#include <linux/dcache.h>
#include <linux/statfs.h>
#include <linux/uuid.h>
#include "hbl.h"
#define AuDirren_NHASH 100
#ifdef CONFIG_AUFS_DIRREN
enum au_brid_type {
AuBrid_Unset,
AuBrid_UUID,
AuBrid_FSID,
AuBrid_DEV
};
struct au_dr_brid {
enum au_brid_type type;
union {
uuid_t uuid; /* unimplemented yet */
fsid_t fsid;
dev_t dev;
};
};
/* 20 is the max digits length of ulong 64 */
/* brid-type "_" uuid "_" inum */
#define AUFS_DIRREN_FNAME_SZ (1 + 1 + UUID_STRING_LEN + 20)
#define AUFS_DIRREN_ENV_VAL_SZ (AUFS_DIRREN_FNAME_SZ + 1 + 20)
struct au_dr_hino {
struct hlist_bl_node dr_hnode;
ino_t dr_h_ino;
};
struct au_dr_br {
struct hlist_bl_head dr_h_ino[AuDirren_NHASH];
struct au_dr_brid dr_brid;
};
struct au_dr_lookup {
/* dr_name is pointed by struct au_do_lookup_args.name */
struct qstr dr_name; /* subset of dr_info */
aufs_bindex_t ninfo;
struct au_drinfo **drinfo;
};
#else
struct au_dr_hino;
/* empty */
struct au_dr_br { };
struct au_dr_lookup { };
#endif
/* ---------------------------------------------------------------------- */
struct au_branch;
struct au_do_lookup_args;
struct au_hinode;
#ifdef CONFIG_AUFS_DIRREN
int au_dr_hino_test_add(struct au_dr_br *dr, ino_t h_ino,
struct au_dr_hino *add_ent);
void au_dr_hino_free(struct au_dr_br *dr);
int au_dr_br_init(struct super_block *sb, struct au_branch *br,
const struct path *path);
int au_dr_br_fin(struct super_block *sb, struct au_branch *br);
int au_dr_rename(struct dentry *src, aufs_bindex_t bindex,
struct qstr *dst_name, void *_rev);
void au_dr_rename_fin(struct dentry *src, aufs_bindex_t btgt, void *rev);
void au_dr_rename_rev(struct dentry *src, aufs_bindex_t bindex, void *rev);
int au_dr_lkup(struct au_do_lookup_args *lkup, struct dentry *dentry,
aufs_bindex_t bindex);
int au_dr_lkup_name(struct au_do_lookup_args *lkup, aufs_bindex_t btgt);
int au_dr_lkup_h_ino(struct au_do_lookup_args *lkup, aufs_bindex_t bindex,
ino_t h_ino);
void au_dr_lkup_fin(struct au_do_lookup_args *lkup);
int au_dr_opt_set(struct super_block *sb);
int au_dr_opt_flush(struct super_block *sb);
int au_dr_opt_clr(struct super_block *sb, int no_flush);
#else
AuStubInt0(au_dr_hino_test_add, struct au_dr_br *dr, ino_t h_ino,
struct au_dr_hino *add_ent);
AuStubVoid(au_dr_hino_free, struct au_dr_br *dr);
AuStubInt0(au_dr_br_init, struct super_block *sb, struct au_branch *br,
const struct path *path);
AuStubInt0(au_dr_br_fin, struct super_block *sb, struct au_branch *br);
AuStubInt0(au_dr_rename, struct dentry *src, aufs_bindex_t bindex,
struct qstr *dst_name, void *_rev);
AuStubVoid(au_dr_rename_fin, struct dentry *src, aufs_bindex_t btgt, void *rev);
AuStubVoid(au_dr_rename_rev, struct dentry *src, aufs_bindex_t bindex,
void *rev);
AuStubInt0(au_dr_lkup, struct au_do_lookup_args *lkup, struct dentry *dentry,
aufs_bindex_t bindex);
AuStubInt0(au_dr_lkup_name, struct au_do_lookup_args *lkup, aufs_bindex_t btgt);
AuStubInt0(au_dr_lkup_h_ino, struct au_do_lookup_args *lkup,
aufs_bindex_t bindex, ino_t h_ino);
AuStubVoid(au_dr_lkup_fin, struct au_do_lookup_args *lkup);
AuStubInt0(au_dr_opt_set, struct super_block *sb);
AuStubInt0(au_dr_opt_flush, struct super_block *sb);
AuStubInt0(au_dr_opt_clr, struct super_block *sb, int no_flush);
#endif
/* ---------------------------------------------------------------------- */
#ifdef CONFIG_AUFS_DIRREN
static inline int au_dr_ihash(ino_t h_ino)
{
return h_ino % AuDirren_NHASH;
}
#else
AuStubInt0(au_dr_ihash, ino_t h_ino);
#endif
#endif /* __KERNEL__ */
#endif /* __AUFS_DIRREN_H__ */

367
fs/aufs/dynop.c Normal file
View File

@ -0,0 +1,367 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2010-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* dynamically customizable operations for regular files
*/
#include "aufs.h"
#define DyPrSym(key) AuDbgSym(key->dk_op.dy_hop)
/*
* How large will these lists be?
* Usually just a few elements, 20-30 at most for each, I guess.
*/
static struct hlist_bl_head dynop[AuDyLast];
static struct au_dykey *dy_gfind_get(struct hlist_bl_head *hbl,
const void *h_op)
{
struct au_dykey *key, *tmp;
struct hlist_bl_node *pos;
key = NULL;
hlist_bl_lock(hbl);
hlist_bl_for_each_entry(tmp, pos, hbl, dk_hnode)
if (tmp->dk_op.dy_hop == h_op) {
if (kref_get_unless_zero(&tmp->dk_kref))
key = tmp;
break;
}
hlist_bl_unlock(hbl);
return key;
}
static struct au_dykey *dy_bradd(struct au_branch *br, struct au_dykey *key)
{
struct au_dykey **k, *found;
const void *h_op = key->dk_op.dy_hop;
int i;
found = NULL;
k = br->br_dykey;
for (i = 0; i < AuBrDynOp; i++)
if (k[i]) {
if (k[i]->dk_op.dy_hop == h_op) {
found = k[i];
break;
}
} else
break;
if (!found) {
spin_lock(&br->br_dykey_lock);
for (; i < AuBrDynOp; i++)
if (k[i]) {
if (k[i]->dk_op.dy_hop == h_op) {
found = k[i];
break;
}
} else {
k[i] = key;
break;
}
spin_unlock(&br->br_dykey_lock);
BUG_ON(i == AuBrDynOp); /* expand the array */
}
return found;
}
/* kref_get() if @key is already added */
static struct au_dykey *dy_gadd(struct hlist_bl_head *hbl, struct au_dykey *key)
{
struct au_dykey *tmp, *found;
struct hlist_bl_node *pos;
const void *h_op = key->dk_op.dy_hop;
found = NULL;
hlist_bl_lock(hbl);
hlist_bl_for_each_entry(tmp, pos, hbl, dk_hnode)
if (tmp->dk_op.dy_hop == h_op) {
if (kref_get_unless_zero(&tmp->dk_kref))
found = tmp;
break;
}
if (!found)
hlist_bl_add_head(&key->dk_hnode, hbl);
hlist_bl_unlock(hbl);
if (!found)
DyPrSym(key);
return found;
}
static void dy_free_rcu(struct rcu_head *rcu)
{
struct au_dykey *key;
key = container_of(rcu, struct au_dykey, dk_rcu);
DyPrSym(key);
kfree(key);
}
static void dy_free(struct kref *kref)
{
struct au_dykey *key;
struct hlist_bl_head *hbl;
key = container_of(kref, struct au_dykey, dk_kref);
hbl = dynop + key->dk_op.dy_type;
au_hbl_del(&key->dk_hnode, hbl);
call_rcu(&key->dk_rcu, dy_free_rcu);
}
void au_dy_put(struct au_dykey *key)
{
kref_put(&key->dk_kref, dy_free);
}
/* ---------------------------------------------------------------------- */
#define DyDbgSize(cnt, op) AuDebugOn(cnt != sizeof(op)/sizeof(void *))
#ifdef CONFIG_AUFS_DEBUG
#define DyDbgDeclare(cnt) unsigned int cnt = 0
#define DyDbgInc(cnt) do { cnt++; } while (0)
#else
#define DyDbgDeclare(cnt) do {} while (0)
#define DyDbgInc(cnt) do {} while (0)
#endif
#define DySet(func, dst, src, h_op, h_sb) do { \
DyDbgInc(cnt); \
if (h_op->func) { \
if (src.func) \
dst.func = src.func; \
else \
AuDbg("%s %s\n", au_sbtype(h_sb), #func); \
} \
} while (0)
#define DySetForce(func, dst, src) do { \
AuDebugOn(!src.func); \
DyDbgInc(cnt); \
dst.func = src.func; \
} while (0)
#define DySetAop(func) \
DySet(func, dyaop->da_op, aufs_aop, h_aop, h_sb)
#define DySetAopForce(func) \
DySetForce(func, dyaop->da_op, aufs_aop)
static void dy_aop(struct au_dykey *key, const void *h_op,
struct super_block *h_sb __maybe_unused)
{
struct au_dyaop *dyaop = (void *)key;
const struct address_space_operations *h_aop = h_op;
DyDbgDeclare(cnt);
AuDbg("%s\n", au_sbtype(h_sb));
DySetAop(writepage);
DySetAopForce(readpage); /* force */
DySetAop(writepages);
DySetAop(set_page_dirty);
DySetAop(readpages);
DySetAop(write_begin);
DySetAop(write_end);
DySetAop(bmap);
DySetAop(invalidatepage);
DySetAop(releasepage);
DySetAop(freepage);
/* this one will be changed according to an aufs mount option */
DySetAop(direct_IO);
DySetAop(migratepage);
DySetAop(isolate_page);
DySetAop(putback_page);
DySetAop(launder_page);
DySetAop(is_partially_uptodate);
DySetAop(is_dirty_writeback);
DySetAop(error_remove_page);
DySetAop(swap_activate);
DySetAop(swap_deactivate);
DyDbgSize(cnt, *h_aop);
}
/* ---------------------------------------------------------------------- */
static void dy_bug(struct kref *kref)
{
BUG();
}
static struct au_dykey *dy_get(struct au_dynop *op, struct au_branch *br)
{
struct au_dykey *key, *old;
struct hlist_bl_head *hbl;
struct op {
unsigned int sz;
void (*set)(struct au_dykey *key, const void *h_op,
struct super_block *h_sb __maybe_unused);
};
static const struct op a[] = {
[AuDy_AOP] = {
.sz = sizeof(struct au_dyaop),
.set = dy_aop
}
};
const struct op *p;
hbl = dynop + op->dy_type;
key = dy_gfind_get(hbl, op->dy_hop);
if (key)
goto out_add; /* success */
p = a + op->dy_type;
key = kzalloc(p->sz, GFP_NOFS);
if (unlikely(!key)) {
key = ERR_PTR(-ENOMEM);
goto out;
}
key->dk_op.dy_hop = op->dy_hop;
kref_init(&key->dk_kref);
p->set(key, op->dy_hop, au_br_sb(br));
old = dy_gadd(hbl, key);
if (old) {
au_kfree_rcu(key);
key = old;
}
out_add:
old = dy_bradd(br, key);
if (old)
/* its ref-count should never be zero here */
kref_put(&key->dk_kref, dy_bug);
out:
return key;
}
/* ---------------------------------------------------------------------- */
/*
* Aufs prohibits O_DIRECT by default even if the branch supports it.
* This behaviour is necessary to return an error from open(O_DIRECT) instead
* of the succeeding I/O. The dio mount option enables O_DIRECT and makes
* open(O_DIRECT) always succeed, but the succeeding I/O may return an error.
* See the aufs manual in detail.
*/
static void dy_adx(struct au_dyaop *dyaop, int do_dx)
{
if (!do_dx)
dyaop->da_op.direct_IO = NULL;
else
dyaop->da_op.direct_IO = aufs_aop.direct_IO;
}
static struct au_dyaop *dy_aget(struct au_branch *br,
const struct address_space_operations *h_aop,
int do_dx)
{
struct au_dyaop *dyaop;
struct au_dynop op;
op.dy_type = AuDy_AOP;
op.dy_haop = h_aop;
dyaop = (void *)dy_get(&op, br);
if (IS_ERR(dyaop))
goto out;
dy_adx(dyaop, do_dx);
out:
return dyaop;
}
int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
struct inode *h_inode)
{
int err, do_dx;
struct super_block *sb;
struct au_branch *br;
struct au_dyaop *dyaop;
AuDebugOn(!S_ISREG(h_inode->i_mode));
IiMustWriteLock(inode);
sb = inode->i_sb;
br = au_sbr(sb, bindex);
do_dx = !!au_opt_test(au_mntflags(sb), DIO);
dyaop = dy_aget(br, h_inode->i_mapping->a_ops, do_dx);
err = PTR_ERR(dyaop);
if (IS_ERR(dyaop))
/* unnecessary to call dy_fput() */
goto out;
err = 0;
inode->i_mapping->a_ops = &dyaop->da_op;
out:
return err;
}
/*
* Is it safe to replace a_ops during the inode/file is in operation?
* Yes, I hope so.
*/
int au_dy_irefresh(struct inode *inode)
{
int err;
aufs_bindex_t btop;
struct inode *h_inode;
err = 0;
if (S_ISREG(inode->i_mode)) {
btop = au_ibtop(inode);
h_inode = au_h_iptr(inode, btop);
err = au_dy_iaop(inode, btop, h_inode);
}
return err;
}
void au_dy_arefresh(int do_dx)
{
struct hlist_bl_head *hbl;
struct hlist_bl_node *pos;
struct au_dykey *key;
hbl = dynop + AuDy_AOP;
hlist_bl_lock(hbl);
hlist_bl_for_each_entry(key, pos, hbl, dk_hnode)
dy_adx((void *)key, do_dx);
hlist_bl_unlock(hbl);
}
/* ---------------------------------------------------------------------- */
void __init au_dy_init(void)
{
int i;
for (i = 0; i < AuDyLast; i++)
INIT_HLIST_BL_HEAD(dynop + i);
}
void au_dy_fin(void)
{
int i;
for (i = 0; i < AuDyLast; i++)
WARN_ON(!hlist_bl_empty(dynop + i));
}

77
fs/aufs/dynop.h Normal file
View File

@ -0,0 +1,77 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2010-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* dynamically customizable operations (for regular files only)
*/
#ifndef __AUFS_DYNOP_H__
#define __AUFS_DYNOP_H__
#ifdef __KERNEL__
#include <linux/fs.h>
#include <linux/kref.h>
enum {AuDy_AOP, AuDyLast};
struct au_dynop {
int dy_type;
union {
const void *dy_hop;
const struct address_space_operations *dy_haop;
};
};
struct au_dykey {
union {
struct hlist_bl_node dk_hnode;
struct rcu_head dk_rcu;
};
struct au_dynop dk_op;
/*
* during I am in the branch local array, kref is gotten. when the
* branch is removed, kref is put.
*/
struct kref dk_kref;
};
/* stop unioning since their sizes are very different from each other */
struct au_dyaop {
struct au_dykey da_key;
struct address_space_operations da_op; /* not const */
};
/* make sure that 'struct au_dykey *' can be any type */
static_assert(!offsetof(struct au_dyaop, da_key));
/* ---------------------------------------------------------------------- */
/* dynop.c */
struct au_branch;
void au_dy_put(struct au_dykey *key);
int au_dy_iaop(struct inode *inode, aufs_bindex_t bindex,
struct inode *h_inode);
int au_dy_irefresh(struct inode *inode);
void au_dy_arefresh(int do_dio);
void __init au_dy_init(void);
void au_dy_fin(void);
#endif /* __KERNEL__ */
#endif /* __AUFS_DYNOP_H__ */

838
fs/aufs/export.c Normal file
View File

@ -0,0 +1,838 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* export via nfs
*/
#include <linux/exportfs.h>
#include <linux/fs_struct.h>
#include <linux/namei.h>
#include <linux/nsproxy.h>
#include <linux/random.h>
#include <linux/writeback.h>
#include "aufs.h"
union conv {
#ifdef CONFIG_AUFS_INO_T_64
__u32 a[2];
#else
__u32 a[1];
#endif
ino_t ino;
};
static ino_t decode_ino(__u32 *a)
{
union conv u;
BUILD_BUG_ON(sizeof(u.ino) != sizeof(u.a));
u.a[0] = a[0];
#ifdef CONFIG_AUFS_INO_T_64
u.a[1] = a[1];
#endif
return u.ino;
}
static void encode_ino(__u32 *a, ino_t ino)
{
union conv u;
u.ino = ino;
a[0] = u.a[0];
#ifdef CONFIG_AUFS_INO_T_64
a[1] = u.a[1];
#endif
}
/* NFS file handle */
enum {
Fh_br_id,
Fh_sigen,
#ifdef CONFIG_AUFS_INO_T_64
/* support 64bit inode number */
Fh_ino1,
Fh_ino2,
Fh_dir_ino1,
Fh_dir_ino2,
#else
Fh_ino1,
Fh_dir_ino1,
#endif
Fh_igen,
Fh_h_type,
Fh_tail,
Fh_ino = Fh_ino1,
Fh_dir_ino = Fh_dir_ino1
};
static int au_test_anon(struct dentry *dentry)
{
/* note: read d_flags without d_lock */
return !!(dentry->d_flags & DCACHE_DISCONNECTED);
}
int au_test_nfsd(void)
{
int ret;
struct task_struct *tsk = current;
char comm[sizeof(tsk->comm)];
ret = 0;
if (tsk->flags & PF_KTHREAD) {
get_task_comm(comm, tsk);
ret = !strcmp(comm, "nfsd");
}
return ret;
}
/* ---------------------------------------------------------------------- */
/* inode generation external table */
void au_xigen_inc(struct inode *inode)
{
loff_t pos;
ssize_t sz;
__u32 igen;
struct super_block *sb;
struct au_sbinfo *sbinfo;
sb = inode->i_sb;
AuDebugOn(!au_opt_test(au_mntflags(sb), XINO));
sbinfo = au_sbi(sb);
pos = inode->i_ino;
pos *= sizeof(igen);
igen = inode->i_generation + 1;
sz = xino_fwrite(sbinfo->si_xwrite, sbinfo->si_xigen, &igen,
sizeof(igen), &pos);
if (sz == sizeof(igen))
return; /* success */
if (unlikely(sz >= 0))
AuIOErr("xigen error (%zd)\n", sz);
}
int au_xigen_new(struct inode *inode)
{
int err;
loff_t pos;
ssize_t sz;
struct super_block *sb;
struct au_sbinfo *sbinfo;
struct file *file;
err = 0;
/* todo: dirty, at mount time */
if (inode->i_ino == AUFS_ROOT_INO)
goto out;
sb = inode->i_sb;
SiMustAnyLock(sb);
if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
goto out;
err = -EFBIG;
pos = inode->i_ino;
if (unlikely(au_loff_max / sizeof(inode->i_generation) - 1 < pos)) {
AuIOErr1("too large i%lld\n", pos);
goto out;
}
pos *= sizeof(inode->i_generation);
err = 0;
sbinfo = au_sbi(sb);
file = sbinfo->si_xigen;
BUG_ON(!file);
if (vfsub_f_size_read(file)
< pos + sizeof(inode->i_generation)) {
inode->i_generation = atomic_inc_return(&sbinfo->si_xigen_next);
sz = xino_fwrite(sbinfo->si_xwrite, file, &inode->i_generation,
sizeof(inode->i_generation), &pos);
} else
sz = xino_fread(sbinfo->si_xread, file, &inode->i_generation,
sizeof(inode->i_generation), &pos);
if (sz == sizeof(inode->i_generation))
goto out; /* success */
err = sz;
if (unlikely(sz >= 0)) {
err = -EIO;
AuIOErr("xigen error (%zd)\n", sz);
}
out:
return err;
}
int au_xigen_set(struct super_block *sb, struct path *path)
{
int err;
struct au_sbinfo *sbinfo;
struct file *file;
SiMustWriteLock(sb);
sbinfo = au_sbi(sb);
file = au_xino_create2(sb, path, sbinfo->si_xigen);
err = PTR_ERR(file);
if (IS_ERR(file))
goto out;
err = 0;
if (sbinfo->si_xigen)
fput(sbinfo->si_xigen);
sbinfo->si_xigen = file;
out:
AuTraceErr(err);
return err;
}
void au_xigen_clr(struct super_block *sb)
{
struct au_sbinfo *sbinfo;
SiMustWriteLock(sb);
sbinfo = au_sbi(sb);
if (sbinfo->si_xigen) {
fput(sbinfo->si_xigen);
sbinfo->si_xigen = NULL;
}
}
/* ---------------------------------------------------------------------- */
static struct dentry *decode_by_ino(struct super_block *sb, ino_t ino,
ino_t dir_ino)
{
struct dentry *dentry, *d;
struct inode *inode;
unsigned int sigen;
dentry = NULL;
inode = ilookup(sb, ino);
if (!inode)
goto out;
dentry = ERR_PTR(-ESTALE);
sigen = au_sigen(sb);
if (unlikely(au_is_bad_inode(inode)
|| IS_DEADDIR(inode)
|| sigen != au_iigen(inode, NULL)))
goto out_iput;
dentry = NULL;
if (!dir_ino || S_ISDIR(inode->i_mode))
dentry = d_find_alias(inode);
else {
spin_lock(&inode->i_lock);
hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
spin_lock(&d->d_lock);
if (!au_test_anon(d)
&& d_inode(d->d_parent)->i_ino == dir_ino) {
dentry = dget_dlock(d);
spin_unlock(&d->d_lock);
break;
}
spin_unlock(&d->d_lock);
}
spin_unlock(&inode->i_lock);
}
if (unlikely(dentry && au_digen_test(dentry, sigen))) {
/* need to refresh */
dput(dentry);
dentry = NULL;
}
out_iput:
iput(inode);
out:
AuTraceErrPtr(dentry);
return dentry;
}
/* ---------------------------------------------------------------------- */
/* todo: dirty? */
/* if exportfs_decode_fh() passed vfsmount*, we could be happy */
struct au_compare_mnt_args {
/* input */
struct super_block *sb;
/* output */
struct vfsmount *mnt;
};
static int au_compare_mnt(struct vfsmount *mnt, void *arg)
{
struct au_compare_mnt_args *a = arg;
if (mnt->mnt_sb != a->sb)
return 0;
a->mnt = mntget(mnt);
return 1;
}
static struct vfsmount *au_mnt_get(struct super_block *sb)
{
int err;
struct path root;
struct au_compare_mnt_args args = {
.sb = sb
};
get_fs_root(current->fs, &root);
rcu_read_lock();
err = iterate_mounts(au_compare_mnt, &args, root.mnt);
rcu_read_unlock();
path_put(&root);
AuDebugOn(!err);
AuDebugOn(!args.mnt);
return args.mnt;
}
struct au_nfsd_si_lock {
unsigned int sigen;
aufs_bindex_t bindex, br_id;
unsigned char force_lock;
};
static int si_nfsd_read_lock(struct super_block *sb,
struct au_nfsd_si_lock *nsi_lock)
{
int err;
aufs_bindex_t bindex;
si_read_lock(sb, AuLock_FLUSH);
/* branch id may be wrapped around */
err = 0;
bindex = au_br_index(sb, nsi_lock->br_id);
if (bindex >= 0 && nsi_lock->sigen + AUFS_BRANCH_MAX > au_sigen(sb))
goto out; /* success */
err = -ESTALE;
bindex = -1;
if (!nsi_lock->force_lock)
si_read_unlock(sb);
out:
nsi_lock->bindex = bindex;
return err;
}
struct find_name_by_ino {
struct dir_context ctx;
int called, found;
ino_t ino;
char *name;
int namelen;
};
static int
find_name_by_ino(struct dir_context *ctx, const char *name, int namelen,
loff_t offset, u64 ino, unsigned int d_type)
{
struct find_name_by_ino *a = container_of(ctx, struct find_name_by_ino,
ctx);
a->called++;
if (a->ino != ino)
return 0;
memcpy(a->name, name, namelen);
a->namelen = namelen;
a->found = 1;
return 1;
}
static struct dentry *au_lkup_by_ino(struct path *path, ino_t ino,
struct au_nfsd_si_lock *nsi_lock)
{
struct dentry *dentry, *parent;
struct file *file;
struct inode *dir;
struct find_name_by_ino arg = {
.ctx = {
.actor = find_name_by_ino
}
};
int err;
parent = path->dentry;
if (nsi_lock)
si_read_unlock(parent->d_sb);
file = vfsub_dentry_open(path, au_dir_roflags);
dentry = (void *)file;
if (IS_ERR(file))
goto out;
dentry = ERR_PTR(-ENOMEM);
arg.name = (void *)__get_free_page(GFP_NOFS);
if (unlikely(!arg.name))
goto out_file;
arg.ino = ino;
arg.found = 0;
do {
arg.called = 0;
/* smp_mb(); */
err = vfsub_iterate_dir(file, &arg.ctx);
} while (!err && !arg.found && arg.called);
dentry = ERR_PTR(err);
if (unlikely(err))
goto out_name;
/* instead of ENOENT */
dentry = ERR_PTR(-ESTALE);
if (!arg.found)
goto out_name;
/* do not call vfsub_lkup_one() */
dir = d_inode(parent);
dentry = vfsub_lookup_one_len_unlocked(arg.name, parent, arg.namelen);
AuTraceErrPtr(dentry);
if (IS_ERR(dentry))
goto out_name;
AuDebugOn(au_test_anon(dentry));
if (unlikely(d_really_is_negative(dentry))) {
dput(dentry);
dentry = ERR_PTR(-ENOENT);
}
out_name:
free_page((unsigned long)arg.name);
out_file:
fput(file);
out:
if (unlikely(nsi_lock
&& si_nfsd_read_lock(parent->d_sb, nsi_lock) < 0))
if (!IS_ERR(dentry)) {
dput(dentry);
dentry = ERR_PTR(-ESTALE);
}
AuTraceErrPtr(dentry);
return dentry;
}
static struct dentry *decode_by_dir_ino(struct super_block *sb, ino_t ino,
ino_t dir_ino,
struct au_nfsd_si_lock *nsi_lock)
{
struct dentry *dentry;
struct path path;
if (dir_ino != AUFS_ROOT_INO) {
path.dentry = decode_by_ino(sb, dir_ino, 0);
dentry = path.dentry;
if (!path.dentry || IS_ERR(path.dentry))
goto out;
AuDebugOn(au_test_anon(path.dentry));
} else
path.dentry = dget(sb->s_root);
path.mnt = au_mnt_get(sb);
dentry = au_lkup_by_ino(&path, ino, nsi_lock);
path_put(&path);
out:
AuTraceErrPtr(dentry);
return dentry;
}
/* ---------------------------------------------------------------------- */
static int h_acceptable(void *expv, struct dentry *dentry)
{
return 1;
}
static char *au_build_path(struct dentry *h_parent, struct path *h_rootpath,
char *buf, int len, struct super_block *sb)
{
char *p;
int n;
struct path path;
p = d_path(h_rootpath, buf, len);
if (IS_ERR(p))
goto out;
n = strlen(p);
path.mnt = h_rootpath->mnt;
path.dentry = h_parent;
p = d_path(&path, buf, len);
if (IS_ERR(p))
goto out;
if (n != 1)
p += n;
path.mnt = au_mnt_get(sb);
path.dentry = sb->s_root;
p = d_path(&path, buf, len - strlen(p));
mntput(path.mnt);
if (IS_ERR(p))
goto out;
if (n != 1)
p[strlen(p)] = '/';
out:
AuTraceErrPtr(p);
return p;
}
static
struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
int fh_len, struct au_nfsd_si_lock *nsi_lock)
{
struct dentry *dentry, *h_parent, *root;
struct super_block *h_sb;
char *pathname, *p;
struct vfsmount *h_mnt;
struct au_branch *br;
int err;
struct path path;
br = au_sbr(sb, nsi_lock->bindex);
h_mnt = au_br_mnt(br);
h_sb = h_mnt->mnt_sb;
/* todo: call lower fh_to_dentry()? fh_to_parent()? */
lockdep_off();
h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
fh_len - Fh_tail, fh[Fh_h_type],
h_acceptable, /*context*/NULL);
lockdep_on();
dentry = h_parent;
if (unlikely(!h_parent || IS_ERR(h_parent))) {
AuWarn1("%s decode_fh failed, %ld\n",
au_sbtype(h_sb), PTR_ERR(h_parent));
goto out;
}
dentry = NULL;
if (unlikely(au_test_anon(h_parent))) {
AuWarn1("%s decode_fh returned a disconnected dentry\n",
au_sbtype(h_sb));
goto out_h_parent;
}
dentry = ERR_PTR(-ENOMEM);
pathname = (void *)__get_free_page(GFP_NOFS);
if (unlikely(!pathname))
goto out_h_parent;
root = sb->s_root;
path.mnt = h_mnt;
di_read_lock_parent(root, !AuLock_IR);
path.dentry = au_h_dptr(root, nsi_lock->bindex);
di_read_unlock(root, !AuLock_IR);
p = au_build_path(h_parent, &path, pathname, PAGE_SIZE, sb);
dentry = (void *)p;
if (IS_ERR(p))
goto out_pathname;
si_read_unlock(sb);
err = vfsub_kern_path(p, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
dentry = ERR_PTR(err);
if (unlikely(err))
goto out_relock;
dentry = ERR_PTR(-ENOENT);
AuDebugOn(au_test_anon(path.dentry));
if (unlikely(d_really_is_negative(path.dentry)))
goto out_path;
if (ino != d_inode(path.dentry)->i_ino)
dentry = au_lkup_by_ino(&path, ino, /*nsi_lock*/NULL);
else
dentry = dget(path.dentry);
out_path:
path_put(&path);
out_relock:
if (unlikely(si_nfsd_read_lock(sb, nsi_lock) < 0))
if (!IS_ERR(dentry)) {
dput(dentry);
dentry = ERR_PTR(-ESTALE);
}
out_pathname:
free_page((unsigned long)pathname);
out_h_parent:
dput(h_parent);
out:
AuTraceErrPtr(dentry);
return dentry;
}
/* ---------------------------------------------------------------------- */
static struct dentry *
aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
int fh_type)
{
struct dentry *dentry;
__u32 *fh = fid->raw;
struct au_branch *br;
ino_t ino, dir_ino;
struct au_nfsd_si_lock nsi_lock = {
.force_lock = 0
};
dentry = ERR_PTR(-ESTALE);
/* it should never happen, but the file handle is unreliable */
if (unlikely(fh_len < Fh_tail))
goto out;
nsi_lock.sigen = fh[Fh_sigen];
nsi_lock.br_id = fh[Fh_br_id];
/* branch id may be wrapped around */
br = NULL;
if (unlikely(si_nfsd_read_lock(sb, &nsi_lock)))
goto out;
nsi_lock.force_lock = 1;
/* is this inode still cached? */
ino = decode_ino(fh + Fh_ino);
/* it should never happen */
if (unlikely(ino == AUFS_ROOT_INO))
goto out_unlock;
dir_ino = decode_ino(fh + Fh_dir_ino);
dentry = decode_by_ino(sb, ino, dir_ino);
if (IS_ERR(dentry))
goto out_unlock;
if (dentry)
goto accept;
/* is the parent dir cached? */
br = au_sbr(sb, nsi_lock.bindex);
au_lcnt_inc(&br->br_nfiles);
dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
if (IS_ERR(dentry))
goto out_unlock;
if (dentry)
goto accept;
/* lookup path */
dentry = decode_by_path(sb, ino, fh, fh_len, &nsi_lock);
if (IS_ERR(dentry))
goto out_unlock;
if (unlikely(!dentry))
/* todo?: make it ESTALE */
goto out_unlock;
accept:
if (!au_digen_test(dentry, au_sigen(sb))
&& d_inode(dentry)->i_generation == fh[Fh_igen])
goto out_unlock; /* success */
dput(dentry);
dentry = ERR_PTR(-ESTALE);
out_unlock:
if (br)
au_lcnt_dec(&br->br_nfiles);
si_read_unlock(sb);
out:
AuTraceErrPtr(dentry);
return dentry;
}
#if 0 /* reserved for future use */
/* support subtreecheck option */
static struct dentry *aufs_fh_to_parent(struct super_block *sb, struct fid *fid,
int fh_len, int fh_type)
{
struct dentry *parent;
__u32 *fh = fid->raw;
ino_t dir_ino;
dir_ino = decode_ino(fh + Fh_dir_ino);
parent = decode_by_ino(sb, dir_ino, 0);
if (IS_ERR(parent))
goto out;
if (!parent)
parent = decode_by_path(sb, au_br_index(sb, fh[Fh_br_id]),
dir_ino, fh, fh_len);
out:
AuTraceErrPtr(parent);
return parent;
}
#endif
/* ---------------------------------------------------------------------- */
static int aufs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
struct inode *dir)
{
int err;
aufs_bindex_t bindex;
struct super_block *sb, *h_sb;
struct dentry *dentry, *parent, *h_parent;
struct inode *h_dir;
struct au_branch *br;
err = -ENOSPC;
if (unlikely(*max_len <= Fh_tail)) {
AuWarn1("NFSv2 client (max_len %d)?\n", *max_len);
goto out;
}
err = FILEID_ROOT;
if (inode->i_ino == AUFS_ROOT_INO) {
AuDebugOn(inode->i_ino != AUFS_ROOT_INO);
goto out;
}
h_parent = NULL;
sb = inode->i_sb;
err = si_read_lock(sb, AuLock_FLUSH);
if (unlikely(err))
goto out;
#ifdef CONFIG_AUFS_DEBUG
if (unlikely(!au_opt_test(au_mntflags(sb), XINO)))
AuWarn1("NFS-exporting requires xino\n");
#endif
err = -EIO;
parent = NULL;
ii_read_lock_child(inode);
bindex = au_ibtop(inode);
if (!dir) {
dentry = d_find_any_alias(inode);
if (unlikely(!dentry))
goto out_unlock;
AuDebugOn(au_test_anon(dentry));
parent = dget_parent(dentry);
dput(dentry);
if (unlikely(!parent))
goto out_unlock;
if (d_really_is_positive(parent))
dir = d_inode(parent);
}
ii_read_lock_parent(dir);
h_dir = au_h_iptr(dir, bindex);
ii_read_unlock(dir);
if (unlikely(!h_dir))
goto out_parent;
h_parent = d_find_any_alias(h_dir);
if (unlikely(!h_parent))
goto out_hparent;
err = -EPERM;
br = au_sbr(sb, bindex);
h_sb = au_br_sb(br);
if (unlikely(!h_sb->s_export_op)) {
AuErr1("%s branch is not exportable\n", au_sbtype(h_sb));
goto out_hparent;
}
fh[Fh_br_id] = br->br_id;
fh[Fh_sigen] = au_sigen(sb);
encode_ino(fh + Fh_ino, inode->i_ino);
encode_ino(fh + Fh_dir_ino, dir->i_ino);
fh[Fh_igen] = inode->i_generation;
*max_len -= Fh_tail;
fh[Fh_h_type] = exportfs_encode_fh(h_parent, (void *)(fh + Fh_tail),
max_len,
/*connectable or subtreecheck*/0);
err = fh[Fh_h_type];
*max_len += Fh_tail;
/* todo: macros? */
if (err != FILEID_INVALID)
err = 99;
else
AuWarn1("%s encode_fh failed\n", au_sbtype(h_sb));
out_hparent:
dput(h_parent);
out_parent:
dput(parent);
out_unlock:
ii_read_unlock(inode);
si_read_unlock(sb);
out:
if (unlikely(err < 0))
err = FILEID_INVALID;
return err;
}
/* ---------------------------------------------------------------------- */
static int aufs_commit_metadata(struct inode *inode)
{
int err;
aufs_bindex_t bindex;
struct super_block *sb;
struct inode *h_inode;
int (*f)(struct inode *inode);
sb = inode->i_sb;
si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
ii_write_lock_child(inode);
bindex = au_ibtop(inode);
AuDebugOn(bindex < 0);
h_inode = au_h_iptr(inode, bindex);
f = h_inode->i_sb->s_export_op->commit_metadata;
if (f)
err = f(h_inode);
else {
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = 0 /* metadata only */
};
err = sync_inode(h_inode, &wbc);
}
au_cpup_attr_timesizes(inode);
ii_write_unlock(inode);
si_read_unlock(sb);
return err;
}
/* ---------------------------------------------------------------------- */
static struct export_operations aufs_export_op = {
.fh_to_dentry = aufs_fh_to_dentry,
/* .fh_to_parent = aufs_fh_to_parent, */
.encode_fh = aufs_encode_fh,
.commit_metadata = aufs_commit_metadata
};
void au_export_init(struct super_block *sb)
{
struct au_sbinfo *sbinfo;
__u32 u;
BUILD_BUG_ON_MSG(IS_BUILTIN(CONFIG_AUFS_FS)
&& IS_MODULE(CONFIG_EXPORTFS),
AUFS_NAME ": unsupported configuration "
"CONFIG_EXPORTFS=m and CONFIG_AUFS_FS=y");
sb->s_export_op = &aufs_export_op;
sbinfo = au_sbi(sb);
sbinfo->si_xigen = NULL;
get_random_bytes(&u, sizeof(u));
BUILD_BUG_ON(sizeof(u) != sizeof(int));
atomic_set(&sbinfo->si_xigen_next, u);
}

819
fs/aufs/f_op.c Normal file
View File

@ -0,0 +1,819 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* file and vm operations
*/
#include <linux/aio.h>
#include <linux/fs_stack.h>
#include <linux/mman.h>
#include <linux/security.h>
#include "aufs.h"
int au_do_open_nondir(struct file *file, int flags, struct file *h_file)
{
int err;
aufs_bindex_t bindex;
struct dentry *dentry, *h_dentry;
struct au_finfo *finfo;
struct inode *h_inode;
FiMustWriteLock(file);
err = 0;
dentry = file->f_path.dentry;
AuDebugOn(IS_ERR_OR_NULL(dentry));
finfo = au_fi(file);
memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
atomic_set(&finfo->fi_mmapped, 0);
bindex = au_dbtop(dentry);
if (!h_file) {
h_dentry = au_h_dptr(dentry, bindex);
err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
if (unlikely(err))
goto out;
h_file = au_h_open(dentry, bindex, flags, file, /*force_wr*/0);
if (IS_ERR(h_file)) {
err = PTR_ERR(h_file);
goto out;
}
} else {
h_dentry = h_file->f_path.dentry;
err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
if (unlikely(err))
goto out;
/* br ref is already inc-ed */
}
if ((flags & __O_TMPFILE)
&& !(flags & O_EXCL)) {
h_inode = file_inode(h_file);
spin_lock(&h_inode->i_lock);
h_inode->i_state |= I_LINKABLE;
spin_unlock(&h_inode->i_lock);
}
au_set_fbtop(file, bindex);
au_set_h_fptr(file, bindex, h_file);
au_update_figen(file);
/* todo: necessary? */
/* file->f_ra = h_file->f_ra; */
out:
return err;
}
static int aufs_open_nondir(struct inode *inode __maybe_unused,
struct file *file)
{
int err;
struct super_block *sb;
struct au_do_open_args args = {
.open = au_do_open_nondir
};
AuDbg("%pD, f_flags 0x%x, f_mode 0x%x\n",
file, vfsub_file_flags(file), file->f_mode);
sb = file->f_path.dentry->d_sb;
si_read_lock(sb, AuLock_FLUSH);
err = au_do_open(file, &args);
si_read_unlock(sb);
return err;
}
int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file)
{
struct au_finfo *finfo;
aufs_bindex_t bindex;
finfo = au_fi(file);
au_hbl_del(&finfo->fi_hlist,
&au_sbi(file->f_path.dentry->d_sb)->si_files);
bindex = finfo->fi_btop;
if (bindex >= 0)
au_set_h_fptr(file, bindex, NULL);
au_finfo_fin(file);
return 0;
}
/* ---------------------------------------------------------------------- */
static int au_do_flush_nondir(struct file *file, fl_owner_t id)
{
int err;
struct file *h_file;
err = 0;
h_file = au_hf_top(file);
if (h_file)
err = vfsub_flush(h_file, id);
return err;
}
static int aufs_flush_nondir(struct file *file, fl_owner_t id)
{
return au_do_flush(file, id, au_do_flush_nondir);
}
/* ---------------------------------------------------------------------- */
/*
* read and write functions acquire [fdi]_rwsem once, but release before
* mmap_sem. This is because to stop a race condition between mmap(2).
* Releasing these aufs-rwsem should be safe, no branch-management (by keeping
* si_rwsem), no harmful copy-up should happen. Actually copy-up may happen in
* read functions after [fdi]_rwsem are released, but it should be harmless.
*/
/* Callers should call au_read_post() or fput() in the end */
struct file *au_read_pre(struct file *file, int keep_fi, unsigned int lsc)
{
struct file *h_file;
int err;
err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/0, lsc);
if (!err) {
di_read_unlock(file->f_path.dentry, AuLock_IR);
h_file = au_hf_top(file);
get_file(h_file);
if (!keep_fi)
fi_read_unlock(file);
} else
h_file = ERR_PTR(err);
return h_file;
}
static void au_read_post(struct inode *inode, struct file *h_file)
{
/* update without lock, I don't think it a problem */
fsstack_copy_attr_atime(inode, file_inode(h_file));
fput(h_file);
}
struct au_write_pre {
/* input */
unsigned int lsc;
/* output */
blkcnt_t blks;
aufs_bindex_t btop;
};
/*
* return with iinfo is write-locked
* callers should call au_write_post() or iinfo_write_unlock() + fput() in the
* end
*/
static struct file *au_write_pre(struct file *file, int do_ready,
struct au_write_pre *wpre)
{
struct file *h_file;
struct dentry *dentry;
int err;
unsigned int lsc;
struct au_pin pin;
lsc = 0;
if (wpre)
lsc = wpre->lsc;
err = au_reval_and_lock_fdi(file, au_reopen_nondir, /*wlock*/1, lsc);
h_file = ERR_PTR(err);
if (unlikely(err))
goto out;
dentry = file->f_path.dentry;
if (do_ready) {
err = au_ready_to_write(file, -1, &pin);
if (unlikely(err)) {
h_file = ERR_PTR(err);
di_write_unlock(dentry);
goto out_fi;
}
}
di_downgrade_lock(dentry, /*flags*/0);
if (wpre)
wpre->btop = au_fbtop(file);
h_file = au_hf_top(file);
get_file(h_file);
if (wpre)
wpre->blks = file_inode(h_file)->i_blocks;
if (do_ready)
au_unpin(&pin);
di_read_unlock(dentry, /*flags*/0);
out_fi:
fi_write_unlock(file);
out:
return h_file;
}
static void au_write_post(struct inode *inode, struct file *h_file,
struct au_write_pre *wpre, ssize_t written)
{
struct inode *h_inode;
au_cpup_attr_timesizes(inode);
AuDebugOn(au_ibtop(inode) != wpre->btop);
h_inode = file_inode(h_file);
inode->i_mode = h_inode->i_mode;
ii_write_unlock(inode);
/* AuDbg("blks %llu, %llu\n", (u64)blks, (u64)h_inode->i_blocks); */
if (written > 0)
au_fhsm_wrote(inode->i_sb, wpre->btop,
/*force*/h_inode->i_blocks > wpre->blks);
fput(h_file);
}
static ssize_t aufs_read(struct file *file, char __user *buf, size_t count,
loff_t *ppos)
{
ssize_t err;
struct inode *inode;
struct file *h_file;
struct super_block *sb;
inode = file_inode(file);
sb = inode->i_sb;
si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
/* filedata may be obsoleted by concurrent copyup, but no problem */
err = vfsub_read_u(h_file, buf, count, ppos);
/* todo: necessary? */
/* file->f_ra = h_file->f_ra; */
au_read_post(inode, h_file);
out:
si_read_unlock(sb);
return err;
}
/*
* todo: very ugly
* it locks both of i_mutex and si_rwsem for read in safe.
* if the plink maintenance mode continues forever (that is the problem),
* may loop forever.
*/
static void au_mtx_and_read_lock(struct inode *inode)
{
int err;
struct super_block *sb = inode->i_sb;
while (1) {
inode_lock(inode);
err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
if (!err)
break;
inode_unlock(inode);
si_read_lock(sb, AuLock_NOPLMW);
si_read_unlock(sb);
}
}
static ssize_t aufs_write(struct file *file, const char __user *ubuf,
size_t count, loff_t *ppos)
{
ssize_t err;
struct au_write_pre wpre;
struct inode *inode;
struct file *h_file;
char __user *buf = (char __user *)ubuf;
inode = file_inode(file);
au_mtx_and_read_lock(inode);
wpre.lsc = 0;
h_file = au_write_pre(file, /*do_ready*/1, &wpre);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
err = vfsub_write_u(h_file, buf, count, ppos);
au_write_post(inode, h_file, &wpre, err);
out:
si_read_unlock(inode->i_sb);
inode_unlock(inode);
return err;
}
static ssize_t au_do_iter(struct file *h_file, int rw, struct kiocb *kio,
struct iov_iter *iov_iter)
{
ssize_t err;
struct file *file;
ssize_t (*iter)(struct kiocb *, struct iov_iter *);
err = security_file_permission(h_file, rw);
if (unlikely(err))
goto out;
err = -ENOSYS; /* the branch doesn't have its ->(read|write)_iter() */
iter = NULL;
if (rw == MAY_READ)
iter = h_file->f_op->read_iter;
else if (rw == MAY_WRITE)
iter = h_file->f_op->write_iter;
file = kio->ki_filp;
kio->ki_filp = h_file;
if (iter) {
lockdep_off();
err = iter(kio, iov_iter);
lockdep_on();
} else
/* currently there is no such fs */
WARN_ON_ONCE(1);
kio->ki_filp = file;
out:
return err;
}
static ssize_t aufs_read_iter(struct kiocb *kio, struct iov_iter *iov_iter)
{
ssize_t err;
struct file *file, *h_file;
struct inode *inode;
struct super_block *sb;
file = kio->ki_filp;
inode = file_inode(file);
sb = inode->i_sb;
si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
h_file = au_read_pre(file, /*keep_fi*/1, /*lsc*/0);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
if (au_test_loopback_kthread()) {
au_warn_loopback(h_file->f_path.dentry->d_sb);
if (file->f_mapping != h_file->f_mapping) {
file->f_mapping = h_file->f_mapping;
smp_mb(); /* unnecessary? */
}
}
fi_read_unlock(file);
err = au_do_iter(h_file, MAY_READ, kio, iov_iter);
/* todo: necessary? */
/* file->f_ra = h_file->f_ra; */
au_read_post(inode, h_file);
out:
si_read_unlock(sb);
return err;
}
static ssize_t aufs_write_iter(struct kiocb *kio, struct iov_iter *iov_iter)
{
ssize_t err;
struct au_write_pre wpre;
struct inode *inode;
struct file *file, *h_file;
file = kio->ki_filp;
inode = file_inode(file);
au_mtx_and_read_lock(inode);
wpre.lsc = 0;
h_file = au_write_pre(file, /*do_ready*/1, &wpre);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
err = au_do_iter(h_file, MAY_WRITE, kio, iov_iter);
au_write_post(inode, h_file, &wpre, err);
out:
si_read_unlock(inode->i_sb);
inode_unlock(inode);
return err;
}
static ssize_t aufs_splice_read(struct file *file, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
ssize_t err;
struct file *h_file;
struct inode *inode;
struct super_block *sb;
inode = file_inode(file);
sb = inode->i_sb;
si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
err = vfsub_splice_to(h_file, ppos, pipe, len, flags);
/* todo: necessary? */
/* file->f_ra = h_file->f_ra; */
au_read_post(inode, h_file);
out:
si_read_unlock(sb);
return err;
}
static ssize_t
aufs_splice_write(struct pipe_inode_info *pipe, struct file *file, loff_t *ppos,
size_t len, unsigned int flags)
{
ssize_t err;
struct au_write_pre wpre;
struct inode *inode;
struct file *h_file;
inode = file_inode(file);
au_mtx_and_read_lock(inode);
wpre.lsc = 0;
h_file = au_write_pre(file, /*do_ready*/1, &wpre);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
err = vfsub_splice_from(pipe, h_file, ppos, len, flags);
au_write_post(inode, h_file, &wpre, err);
out:
si_read_unlock(inode->i_sb);
inode_unlock(inode);
return err;
}
static long aufs_fallocate(struct file *file, int mode, loff_t offset,
loff_t len)
{
long err;
struct au_write_pre wpre;
struct inode *inode;
struct file *h_file;
inode = file_inode(file);
au_mtx_and_read_lock(inode);
wpre.lsc = 0;
h_file = au_write_pre(file, /*do_ready*/1, &wpre);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
lockdep_off();
err = vfs_fallocate(h_file, mode, offset, len);
lockdep_on();
au_write_post(inode, h_file, &wpre, /*written*/1);
out:
si_read_unlock(inode->i_sb);
inode_unlock(inode);
return err;
}
static ssize_t aufs_copy_file_range(struct file *src, loff_t src_pos,
struct file *dst, loff_t dst_pos,
size_t len, unsigned int flags)
{
ssize_t err;
struct au_write_pre wpre;
enum { SRC, DST };
struct {
struct inode *inode;
struct file *h_file;
struct super_block *h_sb;
} a[2];
#define a_src a[SRC]
#define a_dst a[DST]
err = -EINVAL;
a_src.inode = file_inode(src);
if (unlikely(!S_ISREG(a_src.inode->i_mode)))
goto out;
a_dst.inode = file_inode(dst);
if (unlikely(!S_ISREG(a_dst.inode->i_mode)))
goto out;
au_mtx_and_read_lock(a_dst.inode);
/*
* in order to match the order in di_write_lock2_{child,parent}(),
* use f_path.dentry for this comparison.
*/
if (src->f_path.dentry < dst->f_path.dentry) {
a_src.h_file = au_read_pre(src, /*keep_fi*/1, AuLsc_FI_1);
err = PTR_ERR(a_src.h_file);
if (IS_ERR(a_src.h_file))
goto out_si;
wpre.lsc = AuLsc_FI_2;
a_dst.h_file = au_write_pre(dst, /*do_ready*/1, &wpre);
err = PTR_ERR(a_dst.h_file);
if (IS_ERR(a_dst.h_file)) {
au_read_post(a_src.inode, a_src.h_file);
goto out_si;
}
} else {
wpre.lsc = AuLsc_FI_1;
a_dst.h_file = au_write_pre(dst, /*do_ready*/1, &wpre);
err = PTR_ERR(a_dst.h_file);
if (IS_ERR(a_dst.h_file))
goto out_si;
a_src.h_file = au_read_pre(src, /*keep_fi*/1, AuLsc_FI_2);
err = PTR_ERR(a_src.h_file);
if (IS_ERR(a_src.h_file)) {
au_write_post(a_dst.inode, a_dst.h_file, &wpre,
/*written*/0);
goto out_si;
}
}
err = -EXDEV;
a_src.h_sb = file_inode(a_src.h_file)->i_sb;
a_dst.h_sb = file_inode(a_dst.h_file)->i_sb;
if (unlikely(a_src.h_sb != a_dst.h_sb)) {
AuDbgFile(src);
AuDbgFile(dst);
goto out_file;
}
err = vfsub_copy_file_range(a_src.h_file, src_pos, a_dst.h_file,
dst_pos, len, flags);
out_file:
au_write_post(a_dst.inode, a_dst.h_file, &wpre, err);
fi_read_unlock(src);
au_read_post(a_src.inode, a_src.h_file);
out_si:
si_read_unlock(a_dst.inode->i_sb);
inode_unlock(a_dst.inode);
out:
return err;
#undef a_src
#undef a_dst
}
/* ---------------------------------------------------------------------- */
/*
* The locking order around current->mmap_sem.
* - in most and regular cases
* file I/O syscall -- aufs_read() or something
* -- si_rwsem for read -- mmap_sem
* (Note that [fdi]i_rwsem are released before mmap_sem).
* - in mmap case
* mmap(2) -- mmap_sem -- aufs_mmap() -- si_rwsem for read -- [fdi]i_rwsem
* This AB-BA order is definitely bad, but is not a problem since "si_rwsem for
* read" allows multiple processes to acquire it and [fdi]i_rwsem are not held
* in file I/O. Aufs needs to stop lockdep in aufs_mmap() though.
* It means that when aufs acquires si_rwsem for write, the process should never
* acquire mmap_sem.
*
* Actually aufs_iterate() holds [fdi]i_rwsem before mmap_sem, but this is not a
* problem either since any directory is not able to be mmap-ed.
* The similar scenario is applied to aufs_readlink() too.
*/
#if 0 /* stop calling security_file_mmap() */
/* cf. linux/include/linux/mman.h: calc_vm_prot_bits() */
#define AuConv_VM_PROT(f, b) _calc_vm_trans(f, VM_##b, PROT_##b)
static unsigned long au_arch_prot_conv(unsigned long flags)
{
/* currently ppc64 only */
#ifdef CONFIG_PPC64
/* cf. linux/arch/powerpc/include/asm/mman.h */
AuDebugOn(arch_calc_vm_prot_bits(-1) != VM_SAO);
return AuConv_VM_PROT(flags, SAO);
#else
AuDebugOn(arch_calc_vm_prot_bits(-1));
return 0;
#endif
}
static unsigned long au_prot_conv(unsigned long flags)
{
return AuConv_VM_PROT(flags, READ)
| AuConv_VM_PROT(flags, WRITE)
| AuConv_VM_PROT(flags, EXEC)
| au_arch_prot_conv(flags);
}
/* cf. linux/include/linux/mman.h: calc_vm_flag_bits() */
#define AuConv_VM_MAP(f, b) _calc_vm_trans(f, VM_##b, MAP_##b)
static unsigned long au_flag_conv(unsigned long flags)
{
return AuConv_VM_MAP(flags, GROWSDOWN)
| AuConv_VM_MAP(flags, DENYWRITE)
| AuConv_VM_MAP(flags, LOCKED);
}
#endif
static int aufs_mmap(struct file *file, struct vm_area_struct *vma)
{
int err;
const unsigned char wlock
= (file->f_mode & FMODE_WRITE) && (vma->vm_flags & VM_SHARED);
struct super_block *sb;
struct file *h_file;
struct inode *inode;
AuDbgVmRegion(file, vma);
inode = file_inode(file);
sb = inode->i_sb;
lockdep_off();
si_read_lock(sb, AuLock_NOPLMW);
h_file = au_write_pre(file, wlock, /*wpre*/NULL);
lockdep_on();
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
err = 0;
au_set_mmapped(file);
au_vm_file_reset(vma, h_file);
/*
* we cannot call security_mmap_file() here since it may acquire
* mmap_sem or i_mutex.
*
* err = security_mmap_file(h_file, au_prot_conv(vma->vm_flags),
* au_flag_conv(vma->vm_flags));
*/
if (!err)
err = call_mmap(h_file, vma);
if (!err) {
au_vm_prfile_set(vma, file);
fsstack_copy_attr_atime(inode, file_inode(h_file));
goto out_fput; /* success */
}
au_unset_mmapped(file);
au_vm_file_reset(vma, file);
out_fput:
lockdep_off();
ii_write_unlock(inode);
lockdep_on();
fput(h_file);
out:
lockdep_off();
si_read_unlock(sb);
lockdep_on();
AuTraceErr(err);
return err;
}
/* ---------------------------------------------------------------------- */
static int aufs_fsync_nondir(struct file *file, loff_t start, loff_t end,
int datasync)
{
int err;
struct au_write_pre wpre;
struct inode *inode;
struct file *h_file;
err = 0; /* -EBADF; */ /* posix? */
if (unlikely(!(file->f_mode & FMODE_WRITE)))
goto out;
inode = file_inode(file);
au_mtx_and_read_lock(inode);
wpre.lsc = 0;
h_file = au_write_pre(file, /*do_ready*/1, &wpre);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out_unlock;
err = vfsub_fsync(h_file, &h_file->f_path, datasync);
au_write_post(inode, h_file, &wpre, /*written*/0);
out_unlock:
si_read_unlock(inode->i_sb);
inode_unlock(inode);
out:
return err;
}
static int aufs_fasync(int fd, struct file *file, int flag)
{
int err;
struct file *h_file;
struct super_block *sb;
sb = file->f_path.dentry->d_sb;
si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
if (h_file->f_op->fasync)
err = h_file->f_op->fasync(fd, h_file, flag);
fput(h_file); /* instead of au_read_post() */
out:
si_read_unlock(sb);
return err;
}
static int aufs_setfl(struct file *file, unsigned long arg)
{
int err;
struct file *h_file;
struct super_block *sb;
sb = file->f_path.dentry->d_sb;
si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out;
/* stop calling h_file->fasync */
arg |= vfsub_file_flags(file) & FASYNC;
err = setfl(/*unused fd*/-1, h_file, arg);
fput(h_file); /* instead of au_read_post() */
out:
si_read_unlock(sb);
return err;
}
/* ---------------------------------------------------------------------- */
/* no one supports this operation, currently */
#if 0 /* reserved for future use */
static ssize_t aufs_sendpage(struct file *file, struct page *page, int offset,
size_t len, loff_t *pos, int more)
{
}
#endif
/* ---------------------------------------------------------------------- */
const struct file_operations aufs_file_fop = {
.owner = THIS_MODULE,
.llseek = default_llseek,
.read = aufs_read,
.write = aufs_write,
.read_iter = aufs_read_iter,
.write_iter = aufs_write_iter,
#ifdef CONFIG_AUFS_POLL
.poll = aufs_poll,
#endif
.unlocked_ioctl = aufs_ioctl_nondir,
#ifdef CONFIG_COMPAT
.compat_ioctl = aufs_compat_ioctl_nondir,
#endif
.mmap = aufs_mmap,
.open = aufs_open_nondir,
.flush = aufs_flush_nondir,
.release = aufs_release_nondir,
.fsync = aufs_fsync_nondir,
.fasync = aufs_fasync,
/* .sendpage = aufs_sendpage, */
.setfl = aufs_setfl,
.splice_write = aufs_splice_write,
.splice_read = aufs_splice_read,
#if 0 /* reserved for future use */
.aio_splice_write = aufs_aio_splice_write,
.aio_splice_read = aufs_aio_splice_read,
#endif
.fallocate = aufs_fallocate,
.copy_file_range = aufs_copy_file_range
};

427
fs/aufs/fhsm.c Normal file
View File

@ -0,0 +1,427 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2011-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/*
* File-based Hierarchy Storage Management
*/
#include <linux/anon_inodes.h>
#include <linux/poll.h>
#include <linux/seq_file.h>
#include <linux/statfs.h>
#include "aufs.h"
static aufs_bindex_t au_fhsm_bottom(struct super_block *sb)
{
struct au_sbinfo *sbinfo;
struct au_fhsm *fhsm;
SiMustAnyLock(sb);
sbinfo = au_sbi(sb);
fhsm = &sbinfo->si_fhsm;
AuDebugOn(!fhsm);
return fhsm->fhsm_bottom;
}
void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex)
{
struct au_sbinfo *sbinfo;
struct au_fhsm *fhsm;
SiMustWriteLock(sb);
sbinfo = au_sbi(sb);
fhsm = &sbinfo->si_fhsm;
AuDebugOn(!fhsm);
fhsm->fhsm_bottom = bindex;
}
/* ---------------------------------------------------------------------- */
static int au_fhsm_test_jiffy(struct au_sbinfo *sbinfo, struct au_branch *br)
{
struct au_br_fhsm *bf;
bf = br->br_fhsm;
MtxMustLock(&bf->bf_lock);
return !bf->bf_readable
|| time_after(jiffies,
bf->bf_jiffy + sbinfo->si_fhsm.fhsm_expire);
}
/* ---------------------------------------------------------------------- */
static void au_fhsm_notify(struct super_block *sb, int val)
{
struct au_sbinfo *sbinfo;
struct au_fhsm *fhsm;
SiMustAnyLock(sb);
sbinfo = au_sbi(sb);
fhsm = &sbinfo->si_fhsm;
if (au_fhsm_pid(fhsm)
&& atomic_read(&fhsm->fhsm_readable) != -1) {
atomic_set(&fhsm->fhsm_readable, val);
if (val)
wake_up(&fhsm->fhsm_wqh);
}
}
static int au_fhsm_stfs(struct super_block *sb, aufs_bindex_t bindex,
struct aufs_stfs *rstfs, int do_lock, int do_notify)
{
int err;
struct au_branch *br;
struct au_br_fhsm *bf;
br = au_sbr(sb, bindex);
AuDebugOn(au_br_rdonly(br));
bf = br->br_fhsm;
AuDebugOn(!bf);
if (do_lock)
mutex_lock(&bf->bf_lock);
else
MtxMustLock(&bf->bf_lock);
/* sb->s_root for NFS is unreliable */
err = au_br_stfs(br, &bf->bf_stfs);
if (unlikely(err)) {
AuErr1("FHSM failed (%d), b%d, ignored.\n", bindex, err);
goto out;
}
bf->bf_jiffy = jiffies;
bf->bf_readable = 1;
if (do_notify)
au_fhsm_notify(sb, /*val*/1);
if (rstfs)
*rstfs = bf->bf_stfs;
out:
if (do_lock)
mutex_unlock(&bf->bf_lock);
au_fhsm_notify(sb, /*val*/1);
return err;
}
void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force)
{
int err;
struct au_sbinfo *sbinfo;
struct au_fhsm *fhsm;
struct au_branch *br;
struct au_br_fhsm *bf;
AuDbg("b%d, force %d\n", bindex, force);
SiMustAnyLock(sb);
sbinfo = au_sbi(sb);
fhsm = &sbinfo->si_fhsm;
if (!au_ftest_si(sbinfo, FHSM)
|| fhsm->fhsm_bottom == bindex)
return;
br = au_sbr(sb, bindex);
bf = br->br_fhsm;
AuDebugOn(!bf);
mutex_lock(&bf->bf_lock);
if (force
|| au_fhsm_pid(fhsm)
|| au_fhsm_test_jiffy(sbinfo, br))
err = au_fhsm_stfs(sb, bindex, /*rstfs*/NULL, /*do_lock*/0,
/*do_notify*/1);
mutex_unlock(&bf->bf_lock);
}
void au_fhsm_wrote_all(struct super_block *sb, int force)
{
aufs_bindex_t bindex, bbot;
struct au_branch *br;
/* exclude the bottom */
bbot = au_fhsm_bottom(sb);
for (bindex = 0; bindex < bbot; bindex++) {
br = au_sbr(sb, bindex);
if (au_br_fhsm(br->br_perm))
au_fhsm_wrote(sb, bindex, force);
}
}
/* ---------------------------------------------------------------------- */
static __poll_t au_fhsm_poll(struct file *file, struct poll_table_struct *wait)
{
__poll_t mask;
struct au_sbinfo *sbinfo;
struct au_fhsm *fhsm;
mask = 0;
sbinfo = file->private_data;
fhsm = &sbinfo->si_fhsm;
poll_wait(file, &fhsm->fhsm_wqh, wait);
if (atomic_read(&fhsm->fhsm_readable))
mask = EPOLLIN /* | EPOLLRDNORM */;
if (!mask)
AuDbg("mask 0x%x\n", mask);
return mask;
}
static int au_fhsm_do_read_one(struct aufs_stbr __user *stbr,
struct aufs_stfs *stfs, __s16 brid)
{
int err;
err = copy_to_user(&stbr->stfs, stfs, sizeof(*stfs));
if (!err)
err = __put_user(brid, &stbr->brid);
if (unlikely(err))
err = -EFAULT;
return err;
}
static ssize_t au_fhsm_do_read(struct super_block *sb,
struct aufs_stbr __user *stbr, size_t count)
{
ssize_t err;
int nstbr;
aufs_bindex_t bindex, bbot;
struct au_branch *br;
struct au_br_fhsm *bf;
/* except the bottom branch */
err = 0;
nstbr = 0;
bbot = au_fhsm_bottom(sb);
for (bindex = 0; !err && bindex < bbot; bindex++) {
br = au_sbr(sb, bindex);
if (!au_br_fhsm(br->br_perm))
continue;
bf = br->br_fhsm;
mutex_lock(&bf->bf_lock);
if (bf->bf_readable) {
err = -EFAULT;
if (count >= sizeof(*stbr))
err = au_fhsm_do_read_one(stbr++, &bf->bf_stfs,
br->br_id);
if (!err) {
bf->bf_readable = 0;
count -= sizeof(*stbr);
nstbr++;
}
}
mutex_unlock(&bf->bf_lock);
}
if (!err)
err = sizeof(*stbr) * nstbr;
return err;
}
static ssize_t au_fhsm_read(struct file *file, char __user *buf, size_t count,
loff_t *pos)
{
ssize_t err;
int readable;
aufs_bindex_t nfhsm, bindex, bbot;
struct au_sbinfo *sbinfo;
struct au_fhsm *fhsm;
struct au_branch *br;
struct super_block *sb;
err = 0;
sbinfo = file->private_data;
fhsm = &sbinfo->si_fhsm;
need_data:
spin_lock_irq(&fhsm->fhsm_wqh.lock);
if (!atomic_read(&fhsm->fhsm_readable)) {
if (vfsub_file_flags(file) & O_NONBLOCK)
err = -EAGAIN;
else
err = wait_event_interruptible_locked_irq
(fhsm->fhsm_wqh,
atomic_read(&fhsm->fhsm_readable));
}
spin_unlock_irq(&fhsm->fhsm_wqh.lock);
if (unlikely(err))
goto out;
/* sb may already be dead */
au_rw_read_lock(&sbinfo->si_rwsem);
readable = atomic_read(&fhsm->fhsm_readable);
if (readable > 0) {
sb = sbinfo->si_sb;
AuDebugOn(!sb);
/* exclude the bottom branch */
nfhsm = 0;
bbot = au_fhsm_bottom(sb);
for (bindex = 0; bindex < bbot; bindex++) {
br = au_sbr(sb, bindex);
if (au_br_fhsm(br->br_perm))
nfhsm++;
}
err = -EMSGSIZE;
if (nfhsm * sizeof(struct aufs_stbr) <= count) {
atomic_set(&fhsm->fhsm_readable, 0);
err = au_fhsm_do_read(sbinfo->si_sb, (void __user *)buf,
count);
}
}
au_rw_read_unlock(&sbinfo->si_rwsem);
if (!readable)
goto need_data;
out:
return err;
}
static int au_fhsm_release(struct inode *inode, struct file *file)
{
struct au_sbinfo *sbinfo;
struct au_fhsm *fhsm;
/* sb may already be dead */
sbinfo = file->private_data;
fhsm = &sbinfo->si_fhsm;
spin_lock(&fhsm->fhsm_spin);
fhsm->fhsm_pid = 0;
spin_unlock(&fhsm->fhsm_spin);
kobject_put(&sbinfo->si_kobj);
return 0;
}
static const struct file_operations au_fhsm_fops = {
.owner = THIS_MODULE,
.llseek = noop_llseek,
.read = au_fhsm_read,
.poll = au_fhsm_poll,
.release = au_fhsm_release
};
int au_fhsm_fd(struct super_block *sb, int oflags)
{
int err, fd;
struct au_sbinfo *sbinfo;
struct au_fhsm *fhsm;
err = -EPERM;
if (unlikely(!capable(CAP_SYS_ADMIN)))
goto out;
err = -EINVAL;
if (unlikely(oflags & ~(O_CLOEXEC | O_NONBLOCK)))
goto out;
err = 0;
sbinfo = au_sbi(sb);
fhsm = &sbinfo->si_fhsm;
spin_lock(&fhsm->fhsm_spin);
if (!fhsm->fhsm_pid)
fhsm->fhsm_pid = current->pid;
else
err = -EBUSY;
spin_unlock(&fhsm->fhsm_spin);
if (unlikely(err))
goto out;
oflags |= O_RDONLY;
/* oflags |= FMODE_NONOTIFY; */
fd = anon_inode_getfd("[aufs_fhsm]", &au_fhsm_fops, sbinfo, oflags);
err = fd;
if (unlikely(fd < 0))
goto out_pid;
/* succeed regardless 'fhsm' status */
kobject_get(&sbinfo->si_kobj);
si_noflush_read_lock(sb);
if (au_ftest_si(sbinfo, FHSM))
au_fhsm_wrote_all(sb, /*force*/0);
si_read_unlock(sb);
goto out; /* success */
out_pid:
spin_lock(&fhsm->fhsm_spin);
fhsm->fhsm_pid = 0;
spin_unlock(&fhsm->fhsm_spin);
out:
AuTraceErr(err);
return err;
}
/* ---------------------------------------------------------------------- */
int au_fhsm_br_alloc(struct au_branch *br)
{
int err;
err = 0;
br->br_fhsm = kmalloc(sizeof(*br->br_fhsm), GFP_NOFS);
if (br->br_fhsm)
au_br_fhsm_init(br->br_fhsm);
else
err = -ENOMEM;
return err;
}
/* ---------------------------------------------------------------------- */
void au_fhsm_fin(struct super_block *sb)
{
au_fhsm_notify(sb, /*val*/-1);
}
void au_fhsm_init(struct au_sbinfo *sbinfo)
{
struct au_fhsm *fhsm;
fhsm = &sbinfo->si_fhsm;
spin_lock_init(&fhsm->fhsm_spin);
init_waitqueue_head(&fhsm->fhsm_wqh);
atomic_set(&fhsm->fhsm_readable, 0);
fhsm->fhsm_expire
= msecs_to_jiffies(AUFS_FHSM_CACHE_DEF_SEC * MSEC_PER_SEC);
fhsm->fhsm_bottom = -1;
}
void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec)
{
sbinfo->si_fhsm.fhsm_expire
= msecs_to_jiffies(sec * MSEC_PER_SEC);
}
void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo)
{
unsigned int u;
if (!au_ftest_si(sbinfo, FHSM))
return;
u = jiffies_to_msecs(sbinfo->si_fhsm.fhsm_expire) / MSEC_PER_SEC;
if (u != AUFS_FHSM_CACHE_DEF_SEC)
seq_printf(seq, ",fhsm_sec=%u", u);
}

863
fs/aufs/file.c Normal file
View File

@ -0,0 +1,863 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* handling file/dir, and address_space operation
*/
#ifdef CONFIG_AUFS_DEBUG
#include <linux/migrate.h>
#endif
#include <linux/pagemap.h>
#include "aufs.h"
/* drop flags for writing */
unsigned int au_file_roflags(unsigned int flags)
{
flags &= ~(O_WRONLY | O_RDWR | O_APPEND | O_CREAT | O_TRUNC);
flags |= O_RDONLY | O_NOATIME;
return flags;
}
/* common functions to regular file and dir */
struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
struct file *file, int force_wr)
{
struct file *h_file;
struct dentry *h_dentry;
struct inode *h_inode;
struct super_block *sb;
struct au_branch *br;
struct path h_path;
int err;
/* a race condition can happen between open and unlink/rmdir */
h_file = ERR_PTR(-ENOENT);
h_dentry = au_h_dptr(dentry, bindex);
if (au_test_nfsd() && (!h_dentry || d_is_negative(h_dentry)))
goto out;
h_inode = d_inode(h_dentry);
spin_lock(&h_dentry->d_lock);
err = (!d_unhashed(dentry) && d_unlinked(h_dentry))
/* || !d_inode(dentry)->i_nlink */
;
spin_unlock(&h_dentry->d_lock);
if (unlikely(err))
goto out;
sb = dentry->d_sb;
br = au_sbr(sb, bindex);
err = au_br_test_oflag(flags, br);
h_file = ERR_PTR(err);
if (unlikely(err))
goto out;
/* drop flags for writing */
if (au_test_ro(sb, bindex, d_inode(dentry))) {
if (force_wr && !(flags & O_WRONLY))
force_wr = 0;
flags = au_file_roflags(flags);
if (force_wr) {
h_file = ERR_PTR(-EROFS);
flags = au_file_roflags(flags);
if (unlikely(vfsub_native_ro(h_inode)
|| IS_APPEND(h_inode)))
goto out;
flags &= ~O_ACCMODE;
flags |= O_WRONLY;
}
}
flags &= ~O_CREAT;
au_lcnt_inc(&br->br_nfiles);
h_path.dentry = h_dentry;
h_path.mnt = au_br_mnt(br);
h_file = vfsub_dentry_open(&h_path, flags);
if (IS_ERR(h_file))
goto out_br;
if (flags & __FMODE_EXEC) {
err = deny_write_access(h_file);
if (unlikely(err)) {
fput(h_file);
h_file = ERR_PTR(err);
goto out_br;
}
}
fsnotify_open(h_file);
goto out; /* success */
out_br:
au_lcnt_dec(&br->br_nfiles);
out:
return h_file;
}
static int au_cmoo(struct dentry *dentry)
{
int err, cmoo, matched;
unsigned int udba;
struct path h_path;
struct au_pin pin;
struct au_cp_generic cpg = {
.dentry = dentry,
.bdst = -1,
.bsrc = -1,
.len = -1,
.pin = &pin,
.flags = AuCpup_DTIME | AuCpup_HOPEN
};
struct inode *delegated;
struct super_block *sb;
struct au_sbinfo *sbinfo;
struct au_fhsm *fhsm;
pid_t pid;
struct au_branch *br;
struct dentry *parent;
struct au_hinode *hdir;
DiMustWriteLock(dentry);
IiMustWriteLock(d_inode(dentry));
err = 0;
if (IS_ROOT(dentry))
goto out;
cpg.bsrc = au_dbtop(dentry);
if (!cpg.bsrc)
goto out;
sb = dentry->d_sb;
sbinfo = au_sbi(sb);
fhsm = &sbinfo->si_fhsm;
pid = au_fhsm_pid(fhsm);
rcu_read_lock();
matched = (pid
&& (current->pid == pid
|| rcu_dereference(current->real_parent)->pid == pid));
rcu_read_unlock();
if (matched)
goto out;
br = au_sbr(sb, cpg.bsrc);
cmoo = au_br_cmoo(br->br_perm);
if (!cmoo)
goto out;
if (!d_is_reg(dentry))
cmoo &= AuBrAttr_COO_ALL;
if (!cmoo)
goto out;
parent = dget_parent(dentry);
di_write_lock_parent(parent);
err = au_wbr_do_copyup_bu(dentry, cpg.bsrc - 1);
cpg.bdst = err;
if (unlikely(err < 0)) {
err = 0; /* there is no upper writable branch */
goto out_dgrade;
}
AuDbg("bsrc %d, bdst %d\n", cpg.bsrc, cpg.bdst);
/* do not respect the coo attrib for the target branch */
err = au_cpup_dirs(dentry, cpg.bdst);
if (unlikely(err))
goto out_dgrade;
di_downgrade_lock(parent, AuLock_IR);
udba = au_opt_udba(sb);
err = au_pin(&pin, dentry, cpg.bdst, udba,
AuPin_DI_LOCKED | AuPin_MNT_WRITE);
if (unlikely(err))
goto out_parent;
err = au_sio_cpup_simple(&cpg);
au_unpin(&pin);
if (unlikely(err))
goto out_parent;
if (!(cmoo & AuBrWAttr_MOO))
goto out_parent; /* success */
err = au_pin(&pin, dentry, cpg.bsrc, udba,
AuPin_DI_LOCKED | AuPin_MNT_WRITE);
if (unlikely(err))
goto out_parent;
h_path.mnt = au_br_mnt(br);
h_path.dentry = au_h_dptr(dentry, cpg.bsrc);
hdir = au_hi(d_inode(parent), cpg.bsrc);
delegated = NULL;
err = vfsub_unlink(hdir->hi_inode, &h_path, &delegated, /*force*/1);
au_unpin(&pin);
/* todo: keep h_dentry or not? */
if (unlikely(err == -EWOULDBLOCK)) {
pr_warn("cannot retry for NFSv4 delegation"
" for an internal unlink\n");
iput(delegated);
}
if (unlikely(err)) {
pr_err("unlink %pd after coo failed (%d), ignored\n",
dentry, err);
err = 0;
}
goto out_parent; /* success */
out_dgrade:
di_downgrade_lock(parent, AuLock_IR);
out_parent:
di_read_unlock(parent, AuLock_IR);
dput(parent);
out:
AuTraceErr(err);
return err;
}
int au_do_open(struct file *file, struct au_do_open_args *args)
{
int err, aopen = args->aopen;
struct dentry *dentry;
struct au_finfo *finfo;
if (!aopen)
err = au_finfo_init(file, args->fidir);
else {
lockdep_off();
err = au_finfo_init(file, args->fidir);
lockdep_on();
}
if (unlikely(err))
goto out;
dentry = file->f_path.dentry;
AuDebugOn(IS_ERR_OR_NULL(dentry));
di_write_lock_child(dentry);
err = au_cmoo(dentry);
di_downgrade_lock(dentry, AuLock_IR);
if (!err) {
if (!aopen)
err = args->open(file, vfsub_file_flags(file), NULL);
else {
lockdep_off();
err = args->open(file, vfsub_file_flags(file),
args->h_file);
lockdep_on();
}
}
di_read_unlock(dentry, AuLock_IR);
finfo = au_fi(file);
if (!err) {
finfo->fi_file = file;
au_hbl_add(&finfo->fi_hlist,
&au_sbi(file->f_path.dentry->d_sb)->si_files);
}
if (!aopen)
fi_write_unlock(file);
else {
lockdep_off();
fi_write_unlock(file);
lockdep_on();
}
if (unlikely(err)) {
finfo->fi_hdir = NULL;
au_finfo_fin(file);
}
out:
AuTraceErr(err);
return err;
}
int au_reopen_nondir(struct file *file)
{
int err;
aufs_bindex_t btop;
struct dentry *dentry;
struct au_branch *br;
struct file *h_file, *h_file_tmp;
dentry = file->f_path.dentry;
btop = au_dbtop(dentry);
br = au_sbr(dentry->d_sb, btop);
h_file_tmp = NULL;
if (au_fbtop(file) == btop) {
h_file = au_hf_top(file);
if (file->f_mode == h_file->f_mode)
return 0; /* success */
h_file_tmp = h_file;
get_file(h_file_tmp);
au_lcnt_inc(&br->br_nfiles);
au_set_h_fptr(file, btop, NULL);
}
AuDebugOn(au_fi(file)->fi_hdir);
/*
* it can happen
* file exists on both of rw and ro
* open --> dbtop and fbtop are both 0
* prepend a branch as rw, "rw" become ro
* remove rw/file
* delete the top branch, "rw" becomes rw again
* --> dbtop is 1, fbtop is still 0
* write --> fbtop is 0 but dbtop is 1
*/
/* AuDebugOn(au_fbtop(file) < btop); */
h_file = au_h_open(dentry, btop, vfsub_file_flags(file) & ~O_TRUNC,
file, /*force_wr*/0);
err = PTR_ERR(h_file);
if (IS_ERR(h_file)) {
if (h_file_tmp) {
/* revert */
au_set_h_fptr(file, btop, h_file_tmp);
h_file_tmp = NULL;
}
goto out; /* todo: close all? */
}
err = 0;
au_set_fbtop(file, btop);
au_set_h_fptr(file, btop, h_file);
au_update_figen(file);
/* todo: necessary? */
/* file->f_ra = h_file->f_ra; */
out:
if (h_file_tmp) {
fput(h_file_tmp);
au_lcnt_dec(&br->br_nfiles);
}
return err;
}
/* ---------------------------------------------------------------------- */
static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
struct dentry *hi_wh)
{
int err;
aufs_bindex_t btop;
struct au_dinfo *dinfo;
struct dentry *h_dentry;
struct au_hdentry *hdp;
dinfo = au_di(file->f_path.dentry);
AuRwMustWriteLock(&dinfo->di_rwsem);
btop = dinfo->di_btop;
dinfo->di_btop = btgt;
hdp = au_hdentry(dinfo, btgt);
h_dentry = hdp->hd_dentry;
hdp->hd_dentry = hi_wh;
err = au_reopen_nondir(file);
hdp->hd_dentry = h_dentry;
dinfo->di_btop = btop;
return err;
}
static int au_ready_to_write_wh(struct file *file, loff_t len,
aufs_bindex_t bcpup, struct au_pin *pin)
{
int err;
struct inode *inode, *h_inode;
struct dentry *h_dentry, *hi_wh;
struct au_cp_generic cpg = {
.dentry = file->f_path.dentry,
.bdst = bcpup,
.bsrc = -1,
.len = len,
.pin = pin
};
au_update_dbtop(cpg.dentry);
inode = d_inode(cpg.dentry);
h_inode = NULL;
if (au_dbtop(cpg.dentry) <= bcpup
&& au_dbbot(cpg.dentry) >= bcpup) {
h_dentry = au_h_dptr(cpg.dentry, bcpup);
if (h_dentry && d_is_positive(h_dentry))
h_inode = d_inode(h_dentry);
}
hi_wh = au_hi_wh(inode, bcpup);
if (!hi_wh && !h_inode)
err = au_sio_cpup_wh(&cpg, file);
else
/* already copied-up after unlink */
err = au_reopen_wh(file, bcpup, hi_wh);
if (!err
&& (inode->i_nlink > 1
|| (inode->i_state & I_LINKABLE))
&& au_opt_test(au_mntflags(cpg.dentry->d_sb), PLINK))
au_plink_append(inode, bcpup, au_h_dptr(cpg.dentry, bcpup));
return err;
}
/*
* prepare the @file for writing.
*/
int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
{
int err;
aufs_bindex_t dbtop;
struct dentry *parent;
struct inode *inode;
struct super_block *sb;
struct file *h_file;
struct au_cp_generic cpg = {
.dentry = file->f_path.dentry,
.bdst = -1,
.bsrc = -1,
.len = len,
.pin = pin,
.flags = AuCpup_DTIME
};
sb = cpg.dentry->d_sb;
inode = d_inode(cpg.dentry);
cpg.bsrc = au_fbtop(file);
err = au_test_ro(sb, cpg.bsrc, inode);
if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
err = au_pin(pin, cpg.dentry, cpg.bsrc, AuOpt_UDBA_NONE,
/*flags*/0);
goto out;
}
/* need to cpup or reopen */
parent = dget_parent(cpg.dentry);
di_write_lock_parent(parent);
err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
cpg.bdst = err;
if (unlikely(err < 0))
goto out_dgrade;
err = 0;
if (!d_unhashed(cpg.dentry) && !au_h_dptr(parent, cpg.bdst)) {
err = au_cpup_dirs(cpg.dentry, cpg.bdst);
if (unlikely(err))
goto out_dgrade;
}
err = au_pin(pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
AuPin_DI_LOCKED | AuPin_MNT_WRITE);
if (unlikely(err))
goto out_dgrade;
dbtop = au_dbtop(cpg.dentry);
if (dbtop <= cpg.bdst)
cpg.bsrc = cpg.bdst;
if (dbtop <= cpg.bdst /* just reopen */
|| !d_unhashed(cpg.dentry) /* copyup and reopen */
) {
h_file = au_h_open_pre(cpg.dentry, cpg.bsrc, /*force_wr*/0);
if (IS_ERR(h_file))
err = PTR_ERR(h_file);
else {
di_downgrade_lock(parent, AuLock_IR);
if (dbtop > cpg.bdst)
err = au_sio_cpup_simple(&cpg);
if (!err)
err = au_reopen_nondir(file);
au_h_open_post(cpg.dentry, cpg.bsrc, h_file);
}
} else { /* copyup as wh and reopen */
/*
* since writable hfsplus branch is not supported,
* h_open_pre/post() are unnecessary.
*/
err = au_ready_to_write_wh(file, len, cpg.bdst, pin);
di_downgrade_lock(parent, AuLock_IR);
}
if (!err) {
au_pin_set_parent_lflag(pin, /*lflag*/0);
goto out_dput; /* success */
}
au_unpin(pin);
goto out_unlock;
out_dgrade:
di_downgrade_lock(parent, AuLock_IR);
out_unlock:
di_read_unlock(parent, AuLock_IR);
out_dput:
dput(parent);
out:
return err;
}
/* ---------------------------------------------------------------------- */
int au_do_flush(struct file *file, fl_owner_t id,
int (*flush)(struct file *file, fl_owner_t id))
{
int err;
struct super_block *sb;
struct inode *inode;
inode = file_inode(file);
sb = inode->i_sb;
si_noflush_read_lock(sb);
fi_read_lock(file);
ii_read_lock_child(inode);
err = flush(file, id);
au_cpup_attr_timesizes(inode);
ii_read_unlock(inode);
fi_read_unlock(file);
si_read_unlock(sb);
return err;
}
/* ---------------------------------------------------------------------- */
static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
{
int err;
struct au_pin pin;
struct au_finfo *finfo;
struct dentry *parent, *hi_wh;
struct inode *inode;
struct super_block *sb;
struct au_cp_generic cpg = {
.dentry = file->f_path.dentry,
.bdst = -1,
.bsrc = -1,
.len = -1,
.pin = &pin,
.flags = AuCpup_DTIME
};
FiMustWriteLock(file);
err = 0;
finfo = au_fi(file);
sb = cpg.dentry->d_sb;
inode = d_inode(cpg.dentry);
cpg.bdst = au_ibtop(inode);
if (cpg.bdst == finfo->fi_btop || IS_ROOT(cpg.dentry))
goto out;
parent = dget_parent(cpg.dentry);
if (au_test_ro(sb, cpg.bdst, inode)) {
di_read_lock_parent(parent, !AuLock_IR);
err = AuWbrCopyup(au_sbi(sb), cpg.dentry);
cpg.bdst = err;
di_read_unlock(parent, !AuLock_IR);
if (unlikely(err < 0))
goto out_parent;
err = 0;
}
di_read_lock_parent(parent, AuLock_IR);
hi_wh = au_hi_wh(inode, cpg.bdst);
if (!S_ISDIR(inode->i_mode)
&& au_opt_test(au_mntflags(sb), PLINK)
&& au_plink_test(inode)
&& !d_unhashed(cpg.dentry)
&& cpg.bdst < au_dbtop(cpg.dentry)) {
err = au_test_and_cpup_dirs(cpg.dentry, cpg.bdst);
if (unlikely(err))
goto out_unlock;
/* always superio. */
err = au_pin(&pin, cpg.dentry, cpg.bdst, AuOpt_UDBA_NONE,
AuPin_DI_LOCKED | AuPin_MNT_WRITE);
if (!err) {
err = au_sio_cpup_simple(&cpg);
au_unpin(&pin);
}
} else if (hi_wh) {
/* already copied-up after unlink */
err = au_reopen_wh(file, cpg.bdst, hi_wh);
*need_reopen = 0;
}
out_unlock:
di_read_unlock(parent, AuLock_IR);
out_parent:
dput(parent);
out:
return err;
}
static void au_do_refresh_dir(struct file *file)
{
aufs_bindex_t bindex, bbot, new_bindex, brid;
struct au_hfile *p, tmp, *q;
struct au_finfo *finfo;
struct super_block *sb;
struct au_fidir *fidir;
FiMustWriteLock(file);
sb = file->f_path.dentry->d_sb;
finfo = au_fi(file);
fidir = finfo->fi_hdir;
AuDebugOn(!fidir);
p = fidir->fd_hfile + finfo->fi_btop;
brid = p->hf_br->br_id;
bbot = fidir->fd_bbot;
for (bindex = finfo->fi_btop; bindex <= bbot; bindex++, p++) {
if (!p->hf_file)
continue;
new_bindex = au_br_index(sb, p->hf_br->br_id);
if (new_bindex == bindex)
continue;
if (new_bindex < 0) {
au_set_h_fptr(file, bindex, NULL);
continue;
}
/* swap two lower inode, and loop again */
q = fidir->fd_hfile + new_bindex;
tmp = *q;
*q = *p;
*p = tmp;
if (tmp.hf_file) {
bindex--;
p--;
}
}
p = fidir->fd_hfile;
if (!au_test_mmapped(file) && !d_unlinked(file->f_path.dentry)) {
bbot = au_sbbot(sb);
for (finfo->fi_btop = 0; finfo->fi_btop <= bbot;
finfo->fi_btop++, p++)
if (p->hf_file) {
if (file_inode(p->hf_file))
break;
au_hfput(p, /*execed*/0);
}
} else {
bbot = au_br_index(sb, brid);
for (finfo->fi_btop = 0; finfo->fi_btop < bbot;
finfo->fi_btop++, p++)
if (p->hf_file)
au_hfput(p, /*execed*/0);
bbot = au_sbbot(sb);
}
p = fidir->fd_hfile + bbot;
for (fidir->fd_bbot = bbot; fidir->fd_bbot >= finfo->fi_btop;
fidir->fd_bbot--, p--)
if (p->hf_file) {
if (file_inode(p->hf_file))
break;
au_hfput(p, /*execed*/0);
}
AuDebugOn(fidir->fd_bbot < finfo->fi_btop);
}
/*
* after branch manipulating, refresh the file.
*/
static int refresh_file(struct file *file, int (*reopen)(struct file *file))
{
int err, need_reopen, nbr;
aufs_bindex_t bbot, bindex;
struct dentry *dentry;
struct super_block *sb;
struct au_finfo *finfo;
struct au_hfile *hfile;
dentry = file->f_path.dentry;
sb = dentry->d_sb;
nbr = au_sbbot(sb) + 1;
finfo = au_fi(file);
if (!finfo->fi_hdir) {
hfile = &finfo->fi_htop;
AuDebugOn(!hfile->hf_file);
bindex = au_br_index(sb, hfile->hf_br->br_id);
AuDebugOn(bindex < 0);
if (bindex != finfo->fi_btop)
au_set_fbtop(file, bindex);
} else {
err = au_fidir_realloc(finfo, nbr, /*may_shrink*/0);
if (unlikely(err))
goto out;
au_do_refresh_dir(file);
}
err = 0;
need_reopen = 1;
if (!au_test_mmapped(file))
err = au_file_refresh_by_inode(file, &need_reopen);
if (finfo->fi_hdir)
/* harmless if err */
au_fidir_realloc(finfo, nbr, /*may_shrink*/1);
if (!err && need_reopen && !d_unlinked(dentry))
err = reopen(file);
if (!err) {
au_update_figen(file);
goto out; /* success */
}
/* error, close all lower files */
if (finfo->fi_hdir) {
bbot = au_fbbot_dir(file);
for (bindex = au_fbtop(file); bindex <= bbot; bindex++)
au_set_h_fptr(file, bindex, NULL);
}
out:
return err;
}
/* common function to regular file and dir */
int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
int wlock, unsigned int fi_lsc)
{
int err;
unsigned int sigen, figen;
aufs_bindex_t btop;
unsigned char pseudo_link;
struct dentry *dentry;
struct inode *inode;
err = 0;
dentry = file->f_path.dentry;
inode = d_inode(dentry);
sigen = au_sigen(dentry->d_sb);
fi_write_lock_nested(file, fi_lsc);
figen = au_figen(file);
if (!fi_lsc)
di_write_lock_child(dentry);
else
di_write_lock_child2(dentry);
btop = au_dbtop(dentry);
pseudo_link = (btop != au_ibtop(inode));
if (sigen == figen && !pseudo_link && au_fbtop(file) == btop) {
if (!wlock) {
di_downgrade_lock(dentry, AuLock_IR);
fi_downgrade_lock(file);
}
goto out; /* success */
}
AuDbg("sigen %d, figen %d\n", sigen, figen);
if (au_digen_test(dentry, sigen)) {
err = au_reval_dpath(dentry, sigen);
AuDebugOn(!err && au_digen_test(dentry, sigen));
}
if (!err)
err = refresh_file(file, reopen);
if (!err) {
if (!wlock) {
di_downgrade_lock(dentry, AuLock_IR);
fi_downgrade_lock(file);
}
} else {
di_write_unlock(dentry);
fi_write_unlock(file);
}
out:
return err;
}
/* ---------------------------------------------------------------------- */
/* cf. aufs_nopage() */
/* for madvise(2) */
static int aufs_readpage(struct file *file __maybe_unused, struct page *page)
{
unlock_page(page);
return 0;
}
/* it will never be called, but necessary to support O_DIRECT */
static ssize_t aufs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
{ BUG(); return 0; }
/* they will never be called. */
#ifdef CONFIG_AUFS_DEBUG
static int aufs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{ AuUnsupport(); return 0; }
static int aufs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{ AuUnsupport(); return 0; }
static int aufs_writepage(struct page *page, struct writeback_control *wbc)
{ AuUnsupport(); return 0; }
static int aufs_set_page_dirty(struct page *page)
{ AuUnsupport(); return 0; }
static void aufs_invalidatepage(struct page *page, unsigned int offset,
unsigned int length)
{ AuUnsupport(); }
static int aufs_releasepage(struct page *page, gfp_t gfp)
{ AuUnsupport(); return 0; }
#if 0 /* called by memory compaction regardless file */
static int aufs_migratepage(struct address_space *mapping, struct page *newpage,
struct page *page, enum migrate_mode mode)
{ AuUnsupport(); return 0; }
#endif
static bool aufs_isolate_page(struct page *page, isolate_mode_t mode)
{ AuUnsupport(); return true; }
static void aufs_putback_page(struct page *page)
{ AuUnsupport(); }
static int aufs_launder_page(struct page *page)
{ AuUnsupport(); return 0; }
static int aufs_is_partially_uptodate(struct page *page,
unsigned long from,
unsigned long count)
{ AuUnsupport(); return 0; }
static void aufs_is_dirty_writeback(struct page *page, bool *dirty,
bool *writeback)
{ AuUnsupport(); }
static int aufs_error_remove_page(struct address_space *mapping,
struct page *page)
{ AuUnsupport(); return 0; }
static int aufs_swap_activate(struct swap_info_struct *sis, struct file *file,
sector_t *span)
{ AuUnsupport(); return 0; }
static void aufs_swap_deactivate(struct file *file)
{ AuUnsupport(); }
#endif /* CONFIG_AUFS_DEBUG */
const struct address_space_operations aufs_aop = {
.readpage = aufs_readpage,
.direct_IO = aufs_direct_IO,
#ifdef CONFIG_AUFS_DEBUG
.writepage = aufs_writepage,
/* no writepages, because of writepage */
.set_page_dirty = aufs_set_page_dirty,
/* no readpages, because of readpage */
.write_begin = aufs_write_begin,
.write_end = aufs_write_end,
/* no bmap, no block device */
.invalidatepage = aufs_invalidatepage,
.releasepage = aufs_releasepage,
/* is fallback_migrate_page ok? */
/* .migratepage = aufs_migratepage, */
.isolate_page = aufs_isolate_page,
.putback_page = aufs_putback_page,
.launder_page = aufs_launder_page,
.is_partially_uptodate = aufs_is_partially_uptodate,
.is_dirty_writeback = aufs_is_dirty_writeback,
.error_remove_page = aufs_error_remove_page,
.swap_activate = aufs_swap_activate,
.swap_deactivate = aufs_swap_deactivate
#endif /* CONFIG_AUFS_DEBUG */
};

342
fs/aufs/file.h Normal file
View File

@ -0,0 +1,342 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* file operations
*/
#ifndef __AUFS_FILE_H__
#define __AUFS_FILE_H__
#ifdef __KERNEL__
#include <linux/file.h>
#include <linux/fs.h>
#include <linux/mm_types.h>
#include <linux/poll.h>
#include "rwsem.h"
struct au_branch;
struct au_hfile {
struct file *hf_file;
struct au_branch *hf_br;
};
struct au_vdir;
struct au_fidir {
aufs_bindex_t fd_bbot;
aufs_bindex_t fd_nent;
struct au_vdir *fd_vdir_cache;
struct au_hfile fd_hfile[];
};
static inline int au_fidir_sz(int nent)
{
AuDebugOn(nent < 0);
return sizeof(struct au_fidir) + sizeof(struct au_hfile) * nent;
}
struct au_finfo {
atomic_t fi_generation;
struct au_rwsem fi_rwsem;
aufs_bindex_t fi_btop;
/* do not union them */
struct { /* for non-dir */
struct au_hfile fi_htop;
atomic_t fi_mmapped;
};
struct au_fidir *fi_hdir; /* for dir only */
struct hlist_bl_node fi_hlist;
struct file *fi_file; /* very ugly */
struct rcu_head rcu;
} ____cacheline_aligned_in_smp;
/* ---------------------------------------------------------------------- */
/* file.c */
extern const struct address_space_operations aufs_aop;
unsigned int au_file_roflags(unsigned int flags);
struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
struct file *file, int force_wr);
struct au_do_open_args {
int aopen;
int (*open)(struct file *file, int flags,
struct file *h_file);
struct au_fidir *fidir;
struct file *h_file;
};
int au_do_open(struct file *file, struct au_do_open_args *args);
int au_reopen_nondir(struct file *file);
struct au_pin;
int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin);
int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
int wlock, unsigned int fi_lsc);
int au_do_flush(struct file *file, fl_owner_t id,
int (*flush)(struct file *file, fl_owner_t id));
/* poll.c */
#ifdef CONFIG_AUFS_POLL
__poll_t aufs_poll(struct file *file, struct poll_table_struct *pt);
#endif
#ifdef CONFIG_AUFS_BR_HFSPLUS
/* hfsplus.c */
struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
int force_wr);
void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
struct file *h_file);
#else
AuStub(struct file *, au_h_open_pre, return NULL, struct dentry *dentry,
aufs_bindex_t bindex, int force_wr)
AuStubVoid(au_h_open_post, struct dentry *dentry, aufs_bindex_t bindex,
struct file *h_file);
#endif
/* f_op.c */
extern const struct file_operations aufs_file_fop;
int au_do_open_nondir(struct file *file, int flags, struct file *h_file);
int aufs_release_nondir(struct inode *inode __maybe_unused, struct file *file);
struct file *au_read_pre(struct file *file, int keep_fi, unsigned int lsc);
/* finfo.c */
void au_hfput(struct au_hfile *hf, int execed);
void au_set_h_fptr(struct file *file, aufs_bindex_t bindex,
struct file *h_file);
void au_update_figen(struct file *file);
struct au_fidir *au_fidir_alloc(struct super_block *sb);
int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink);
void au_fi_init_once(void *_fi);
void au_finfo_fin(struct file *file);
int au_finfo_init(struct file *file, struct au_fidir *fidir);
/* ioctl.c */
long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
unsigned long arg);
long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
unsigned long arg);
#endif
/* ---------------------------------------------------------------------- */
static inline struct au_finfo *au_fi(struct file *file)
{
return file->private_data;
}
/* ---------------------------------------------------------------------- */
#define fi_read_lock(f) au_rw_read_lock(&au_fi(f)->fi_rwsem)
#define fi_write_lock(f) au_rw_write_lock(&au_fi(f)->fi_rwsem)
#define fi_read_trylock(f) au_rw_read_trylock(&au_fi(f)->fi_rwsem)
#define fi_write_trylock(f) au_rw_write_trylock(&au_fi(f)->fi_rwsem)
/*
#define fi_read_trylock_nested(f) \
au_rw_read_trylock_nested(&au_fi(f)->fi_rwsem)
#define fi_write_trylock_nested(f) \
au_rw_write_trylock_nested(&au_fi(f)->fi_rwsem)
*/
#define fi_read_unlock(f) au_rw_read_unlock(&au_fi(f)->fi_rwsem)
#define fi_write_unlock(f) au_rw_write_unlock(&au_fi(f)->fi_rwsem)
#define fi_downgrade_lock(f) au_rw_dgrade_lock(&au_fi(f)->fi_rwsem)
/* lock subclass for finfo */
enum {
AuLsc_FI_1,
AuLsc_FI_2
};
static inline void fi_read_lock_nested(struct file *f, unsigned int lsc)
{
au_rw_read_lock_nested(&au_fi(f)->fi_rwsem, lsc);
}
static inline void fi_write_lock_nested(struct file *f, unsigned int lsc)
{
au_rw_write_lock_nested(&au_fi(f)->fi_rwsem, lsc);
}
/*
* fi_read_lock_1, fi_write_lock_1,
* fi_read_lock_2, fi_write_lock_2
*/
#define AuReadLockFunc(name) \
static inline void fi_read_lock_##name(struct file *f) \
{ fi_read_lock_nested(f, AuLsc_FI_##name); }
#define AuWriteLockFunc(name) \
static inline void fi_write_lock_##name(struct file *f) \
{ fi_write_lock_nested(f, AuLsc_FI_##name); }
#define AuRWLockFuncs(name) \
AuReadLockFunc(name) \
AuWriteLockFunc(name)
AuRWLockFuncs(1);
AuRWLockFuncs(2);
#undef AuReadLockFunc
#undef AuWriteLockFunc
#undef AuRWLockFuncs
#define FiMustNoWaiters(f) AuRwMustNoWaiters(&au_fi(f)->fi_rwsem)
#define FiMustAnyLock(f) AuRwMustAnyLock(&au_fi(f)->fi_rwsem)
#define FiMustWriteLock(f) AuRwMustWriteLock(&au_fi(f)->fi_rwsem)
/* ---------------------------------------------------------------------- */
/* todo: hard/soft set? */
static inline aufs_bindex_t au_fbtop(struct file *file)
{
FiMustAnyLock(file);
return au_fi(file)->fi_btop;
}
static inline aufs_bindex_t au_fbbot_dir(struct file *file)
{
FiMustAnyLock(file);
AuDebugOn(!au_fi(file)->fi_hdir);
return au_fi(file)->fi_hdir->fd_bbot;
}
static inline struct au_vdir *au_fvdir_cache(struct file *file)
{
FiMustAnyLock(file);
AuDebugOn(!au_fi(file)->fi_hdir);
return au_fi(file)->fi_hdir->fd_vdir_cache;
}
static inline void au_set_fbtop(struct file *file, aufs_bindex_t bindex)
{
FiMustWriteLock(file);
au_fi(file)->fi_btop = bindex;
}
static inline void au_set_fbbot_dir(struct file *file, aufs_bindex_t bindex)
{
FiMustWriteLock(file);
AuDebugOn(!au_fi(file)->fi_hdir);
au_fi(file)->fi_hdir->fd_bbot = bindex;
}
static inline void au_set_fvdir_cache(struct file *file,
struct au_vdir *vdir_cache)
{
FiMustWriteLock(file);
AuDebugOn(!au_fi(file)->fi_hdir);
au_fi(file)->fi_hdir->fd_vdir_cache = vdir_cache;
}
static inline struct file *au_hf_top(struct file *file)
{
FiMustAnyLock(file);
AuDebugOn(au_fi(file)->fi_hdir);
return au_fi(file)->fi_htop.hf_file;
}
static inline struct file *au_hf_dir(struct file *file, aufs_bindex_t bindex)
{
FiMustAnyLock(file);
AuDebugOn(!au_fi(file)->fi_hdir);
return au_fi(file)->fi_hdir->fd_hfile[0 + bindex].hf_file;
}
/* todo: memory barrier? */
static inline unsigned int au_figen(struct file *f)
{
return atomic_read(&au_fi(f)->fi_generation);
}
static inline void au_set_mmapped(struct file *f)
{
if (atomic_inc_return(&au_fi(f)->fi_mmapped))
return;
pr_warn("fi_mmapped wrapped around\n");
while (!atomic_inc_return(&au_fi(f)->fi_mmapped))
;
}
static inline void au_unset_mmapped(struct file *f)
{
atomic_dec(&au_fi(f)->fi_mmapped);
}
static inline int au_test_mmapped(struct file *f)
{
return atomic_read(&au_fi(f)->fi_mmapped);
}
/* customize vma->vm_file */
static inline void au_do_vm_file_reset(struct vm_area_struct *vma,
struct file *file)
{
struct file *f;
f = vma->vm_file;
get_file(file);
vma->vm_file = file;
fput(f);
}
#ifdef CONFIG_MMU
#define AuDbgVmRegion(file, vma) do {} while (0)
static inline void au_vm_file_reset(struct vm_area_struct *vma,
struct file *file)
{
au_do_vm_file_reset(vma, file);
}
#else
#define AuDbgVmRegion(file, vma) \
AuDebugOn((vma)->vm_region && (vma)->vm_region->vm_file != (file))
static inline void au_vm_file_reset(struct vm_area_struct *vma,
struct file *file)
{
struct file *f;
au_do_vm_file_reset(vma, file);
f = vma->vm_region->vm_file;
get_file(file);
vma->vm_region->vm_file = file;
fput(f);
}
#endif /* CONFIG_MMU */
/* handle vma->vm_prfile */
static inline void au_vm_prfile_set(struct vm_area_struct *vma,
struct file *file)
{
get_file(file);
vma->vm_prfile = file;
#ifndef CONFIG_MMU
get_file(file);
vma->vm_region->vm_prfile = file;
#endif
}
#endif /* __KERNEL__ */
#endif /* __AUFS_FILE_H__ */

149
fs/aufs/finfo.c Normal file
View File

@ -0,0 +1,149 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* file private data
*/
#include "aufs.h"
void au_hfput(struct au_hfile *hf, int execed)
{
if (execed)
allow_write_access(hf->hf_file);
fput(hf->hf_file);
hf->hf_file = NULL;
au_lcnt_dec(&hf->hf_br->br_nfiles);
hf->hf_br = NULL;
}
void au_set_h_fptr(struct file *file, aufs_bindex_t bindex, struct file *val)
{
struct au_finfo *finfo = au_fi(file);
struct au_hfile *hf;
struct au_fidir *fidir;
fidir = finfo->fi_hdir;
if (!fidir) {
AuDebugOn(finfo->fi_btop != bindex);
hf = &finfo->fi_htop;
} else
hf = fidir->fd_hfile + bindex;
if (hf && hf->hf_file)
au_hfput(hf, vfsub_file_execed(file));
if (val) {
FiMustWriteLock(file);
AuDebugOn(IS_ERR_OR_NULL(file->f_path.dentry));
hf->hf_file = val;
hf->hf_br = au_sbr(file->f_path.dentry->d_sb, bindex);
}
}
void au_update_figen(struct file *file)
{
atomic_set(&au_fi(file)->fi_generation, au_digen(file->f_path.dentry));
/* smp_mb(); */ /* atomic_set */
}
/* ---------------------------------------------------------------------- */
struct au_fidir *au_fidir_alloc(struct super_block *sb)
{
struct au_fidir *fidir;
int nbr;
nbr = au_sbbot(sb) + 1;
if (nbr < 2)
nbr = 2; /* initial allocate for 2 branches */
fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
if (fidir) {
fidir->fd_bbot = -1;
fidir->fd_nent = nbr;
}
return fidir;
}
int au_fidir_realloc(struct au_finfo *finfo, int nbr, int may_shrink)
{
int err;
struct au_fidir *fidir, *p;
AuRwMustWriteLock(&finfo->fi_rwsem);
fidir = finfo->fi_hdir;
AuDebugOn(!fidir);
err = -ENOMEM;
p = au_kzrealloc(fidir, au_fidir_sz(fidir->fd_nent), au_fidir_sz(nbr),
GFP_NOFS, may_shrink);
if (p) {
p->fd_nent = nbr;
finfo->fi_hdir = p;
err = 0;
}
return err;
}
/* ---------------------------------------------------------------------- */
void au_finfo_fin(struct file *file)
{
struct au_finfo *finfo;
au_lcnt_dec(&au_sbi(file->f_path.dentry->d_sb)->si_nfiles);
finfo = au_fi(file);
AuDebugOn(finfo->fi_hdir);
AuRwDestroy(&finfo->fi_rwsem);
au_cache_free_finfo(finfo);
}
void au_fi_init_once(void *_finfo)
{
struct au_finfo *finfo = _finfo;
au_rw_init(&finfo->fi_rwsem);
}
int au_finfo_init(struct file *file, struct au_fidir *fidir)
{
int err;
struct au_finfo *finfo;
struct dentry *dentry;
err = -ENOMEM;
dentry = file->f_path.dentry;
finfo = au_cache_alloc_finfo();
if (unlikely(!finfo))
goto out;
err = 0;
au_lcnt_inc(&au_sbi(dentry->d_sb)->si_nfiles);
au_rw_write_lock(&finfo->fi_rwsem);
finfo->fi_btop = -1;
finfo->fi_hdir = fidir;
atomic_set(&finfo->fi_generation, au_digen(dentry));
/* smp_mb(); */ /* atomic_set */
file->private_data = finfo;
out:
return err;
}

401
fs/aufs/fstype.h Normal file
View File

@ -0,0 +1,401 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* judging filesystem type
*/
#ifndef __AUFS_FSTYPE_H__
#define __AUFS_FSTYPE_H__
#ifdef __KERNEL__
#include <linux/fs.h>
#include <linux/magic.h>
#include <linux/nfs_fs.h>
#include <linux/romfs_fs.h>
static inline int au_test_aufs(struct super_block *sb)
{
return sb->s_magic == AUFS_SUPER_MAGIC;
}
static inline const char *au_sbtype(struct super_block *sb)
{
return sb->s_type->name;
}
static inline int au_test_iso9660(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_ISO9660_FS)
return sb->s_magic == ISOFS_SUPER_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_romfs(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_ROMFS_FS)
return sb->s_magic == ROMFS_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_cramfs(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_CRAMFS)
return sb->s_magic == CRAMFS_MAGIC;
#endif
return 0;
}
static inline int au_test_nfs(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_NFS_FS)
return sb->s_magic == NFS_SUPER_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_fuse(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_FUSE_FS)
return sb->s_magic == FUSE_SUPER_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_xfs(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_XFS_FS)
return sb->s_magic == XFS_SB_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_tmpfs(struct super_block *sb __maybe_unused)
{
#ifdef CONFIG_TMPFS
return sb->s_magic == TMPFS_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_ecryptfs(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_ECRYPT_FS)
return !strcmp(au_sbtype(sb), "ecryptfs");
#else
return 0;
#endif
}
static inline int au_test_ramfs(struct super_block *sb)
{
return sb->s_magic == RAMFS_MAGIC;
}
static inline int au_test_ubifs(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_UBIFS_FS)
return sb->s_magic == UBIFS_SUPER_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_procfs(struct super_block *sb __maybe_unused)
{
#ifdef CONFIG_PROC_FS
return sb->s_magic == PROC_SUPER_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_sysfs(struct super_block *sb __maybe_unused)
{
#ifdef CONFIG_SYSFS
return sb->s_magic == SYSFS_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_configfs(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_CONFIGFS_FS)
return sb->s_magic == CONFIGFS_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_minix(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_MINIX_FS)
return sb->s_magic == MINIX3_SUPER_MAGIC
|| sb->s_magic == MINIX2_SUPER_MAGIC
|| sb->s_magic == MINIX2_SUPER_MAGIC2
|| sb->s_magic == MINIX_SUPER_MAGIC
|| sb->s_magic == MINIX_SUPER_MAGIC2;
#else
return 0;
#endif
}
static inline int au_test_fat(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_FAT_FS)
return sb->s_magic == MSDOS_SUPER_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_msdos(struct super_block *sb)
{
return au_test_fat(sb);
}
static inline int au_test_vfat(struct super_block *sb)
{
return au_test_fat(sb);
}
static inline int au_test_securityfs(struct super_block *sb __maybe_unused)
{
#ifdef CONFIG_SECURITYFS
return sb->s_magic == SECURITYFS_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_squashfs(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_SQUASHFS)
return sb->s_magic == SQUASHFS_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_btrfs(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_BTRFS_FS)
return sb->s_magic == BTRFS_SUPER_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_xenfs(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_XENFS)
return sb->s_magic == XENFS_SUPER_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_debugfs(struct super_block *sb __maybe_unused)
{
#ifdef CONFIG_DEBUG_FS
return sb->s_magic == DEBUGFS_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_nilfs(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_NILFS)
return sb->s_magic == NILFS_SUPER_MAGIC;
#else
return 0;
#endif
}
static inline int au_test_hfsplus(struct super_block *sb __maybe_unused)
{
#if IS_ENABLED(CONFIG_HFSPLUS_FS)
return sb->s_magic == HFSPLUS_SUPER_MAGIC;
#else
return 0;
#endif
}
/* ---------------------------------------------------------------------- */
/*
* they can't be an aufs branch.
*/
static inline int au_test_fs_unsuppoted(struct super_block *sb)
{
return
#ifndef CONFIG_AUFS_BR_RAMFS
au_test_ramfs(sb) ||
#endif
au_test_procfs(sb)
|| au_test_sysfs(sb)
|| au_test_configfs(sb)
|| au_test_debugfs(sb)
|| au_test_securityfs(sb)
|| au_test_xenfs(sb)
|| au_test_ecryptfs(sb)
/* || !strcmp(au_sbtype(sb), "unionfs") */
|| au_test_aufs(sb); /* will be supported in next version */
}
static inline int au_test_fs_remote(struct super_block *sb)
{
return !au_test_tmpfs(sb)
#ifdef CONFIG_AUFS_BR_RAMFS
&& !au_test_ramfs(sb)
#endif
&& !(sb->s_type->fs_flags & FS_REQUIRES_DEV);
}
/* ---------------------------------------------------------------------- */
/*
* Note: these functions (below) are created after reading ->getattr() in all
* filesystems under linux/fs. it means we have to do so in every update...
*/
/*
* some filesystems require getattr to refresh the inode attributes before
* referencing.
* in most cases, we can rely on the inode attribute in NFS (or every remote fs)
* and leave the work for d_revalidate()
*/
static inline int au_test_fs_refresh_iattr(struct super_block *sb)
{
return au_test_nfs(sb)
|| au_test_fuse(sb)
/* || au_test_btrfs(sb) */ /* untested */
;
}
/*
* filesystems which don't maintain i_size or i_blocks.
*/
static inline int au_test_fs_bad_iattr_size(struct super_block *sb)
{
return au_test_xfs(sb)
|| au_test_btrfs(sb)
|| au_test_ubifs(sb)
|| au_test_hfsplus(sb) /* maintained, but incorrect */
/* || au_test_minix(sb) */ /* untested */
;
}
/*
* filesystems which don't store the correct value in some of their inode
* attributes.
*/
static inline int au_test_fs_bad_iattr(struct super_block *sb)
{
return au_test_fs_bad_iattr_size(sb)
|| au_test_fat(sb)
|| au_test_msdos(sb)
|| au_test_vfat(sb);
}
/* they don't check i_nlink in link(2) */
static inline int au_test_fs_no_limit_nlink(struct super_block *sb)
{
return au_test_tmpfs(sb)
#ifdef CONFIG_AUFS_BR_RAMFS
|| au_test_ramfs(sb)
#endif
|| au_test_ubifs(sb)
|| au_test_hfsplus(sb);
}
/*
* filesystems which sets S_NOATIME and S_NOCMTIME.
*/
static inline int au_test_fs_notime(struct super_block *sb)
{
return au_test_nfs(sb)
|| au_test_fuse(sb)
|| au_test_ubifs(sb)
;
}
/* temporary support for i#1 in cramfs */
static inline int au_test_fs_unique_ino(struct inode *inode)
{
if (au_test_cramfs(inode->i_sb))
return inode->i_ino != 1;
return 1;
}
/* ---------------------------------------------------------------------- */
/*
* the filesystem where the xino files placed must support i/o after unlink and
* maintain i_size and i_blocks.
*/
static inline int au_test_fs_bad_xino(struct super_block *sb)
{
return au_test_fs_remote(sb)
|| au_test_fs_bad_iattr_size(sb)
/* don't want unnecessary work for xino */
|| au_test_aufs(sb)
|| au_test_ecryptfs(sb)
|| au_test_nilfs(sb);
}
static inline int au_test_fs_trunc_xino(struct super_block *sb)
{
return au_test_tmpfs(sb)
|| au_test_ramfs(sb);
}
/*
* test if the @sb is real-readonly.
*/
static inline int au_test_fs_rr(struct super_block *sb)
{
return au_test_squashfs(sb)
|| au_test_iso9660(sb)
|| au_test_cramfs(sb)
|| au_test_romfs(sb);
}
/*
* test if the @inode is nfs with 'noacl' option
* NFS always sets SB_POSIXACL regardless its mount option 'noacl.'
*/
static inline int au_test_nfs_noacl(struct inode *inode)
{
return au_test_nfs(inode->i_sb)
/* && IS_POSIXACL(inode) */
&& !nfs_server_capable(inode, NFS_CAP_ACLS);
}
#endif /* __KERNEL__ */
#endif /* __AUFS_FSTYPE_H__ */

65
fs/aufs/hbl.h Normal file
View File

@ -0,0 +1,65 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2017-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* helpers for hlist_bl.h
*/
#ifndef __AUFS_HBL_H__
#define __AUFS_HBL_H__
#ifdef __KERNEL__
#include <linux/list_bl.h>
static inline void au_hbl_add(struct hlist_bl_node *node,
struct hlist_bl_head *hbl)
{
hlist_bl_lock(hbl);
hlist_bl_add_head(node, hbl);
hlist_bl_unlock(hbl);
}
static inline void au_hbl_del(struct hlist_bl_node *node,
struct hlist_bl_head *hbl)
{
hlist_bl_lock(hbl);
hlist_bl_del(node);
hlist_bl_unlock(hbl);
}
#define au_hbl_for_each(pos, head) \
for (pos = hlist_bl_first(head); \
pos; \
pos = pos->next)
static inline unsigned long au_hbl_count(struct hlist_bl_head *hbl)
{
unsigned long cnt;
struct hlist_bl_node *pos;
cnt = 0;
hlist_bl_lock(hbl);
au_hbl_for_each(pos, hbl)
cnt++;
hlist_bl_unlock(hbl);
return cnt;
}
#endif /* __KERNEL__ */
#endif /* __AUFS_HBL_H__ */

288
fs/aufs/hfsnotify.c Normal file
View File

@ -0,0 +1,288 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* fsnotify for the lower directories
*/
#include "aufs.h"
/* FS_IN_IGNORED is unnecessary */
static const __u32 AuHfsnMask = (FS_MOVED_TO | FS_MOVED_FROM | FS_DELETE
| FS_CREATE | FS_EVENT_ON_CHILD);
static DECLARE_WAIT_QUEUE_HEAD(au_hfsn_wq);
static __cacheline_aligned_in_smp atomic64_t au_hfsn_ifree = ATOMIC64_INIT(0);
static void au_hfsn_free_mark(struct fsnotify_mark *mark)
{
struct au_hnotify *hn = container_of(mark, struct au_hnotify,
hn_mark);
/* AuDbg("here\n"); */
au_cache_free_hnotify(hn);
smp_mb__before_atomic(); /* for atomic64_dec */
if (atomic64_dec_and_test(&au_hfsn_ifree))
wake_up(&au_hfsn_wq);
}
static int au_hfsn_alloc(struct au_hinode *hinode)
{
int err;
struct au_hnotify *hn;
struct super_block *sb;
struct au_branch *br;
struct fsnotify_mark *mark;
aufs_bindex_t bindex;
hn = hinode->hi_notify;
sb = hn->hn_aufs_inode->i_sb;
bindex = au_br_index(sb, hinode->hi_id);
br = au_sbr(sb, bindex);
AuDebugOn(!br->br_hfsn);
mark = &hn->hn_mark;
fsnotify_init_mark(mark, br->br_hfsn->hfsn_group);
mark->mask = AuHfsnMask;
/*
* by udba rename or rmdir, aufs assign a new inode to the known
* h_inode, so specify 1 to allow dups.
*/
lockdep_off();
err = fsnotify_add_inode_mark(mark, hinode->hi_inode, /*allow_dups*/1);
lockdep_on();
return err;
}
static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn)
{
struct fsnotify_mark *mark;
unsigned long long ull;
struct fsnotify_group *group;
ull = atomic64_inc_return(&au_hfsn_ifree);
BUG_ON(!ull);
mark = &hn->hn_mark;
spin_lock(&mark->lock);
group = mark->group;
fsnotify_get_group(group);
spin_unlock(&mark->lock);
lockdep_off();
fsnotify_destroy_mark(mark, group);
fsnotify_put_mark(mark);
fsnotify_put_group(group);
lockdep_on();
/* free hn by myself */
return 0;
}
/* ---------------------------------------------------------------------- */
static void au_hfsn_ctl(struct au_hinode *hinode, int do_set)
{
struct fsnotify_mark *mark;
mark = &hinode->hi_notify->hn_mark;
spin_lock(&mark->lock);
if (do_set) {
AuDebugOn(mark->mask & AuHfsnMask);
mark->mask |= AuHfsnMask;
} else {
AuDebugOn(!(mark->mask & AuHfsnMask));
mark->mask &= ~AuHfsnMask;
}
spin_unlock(&mark->lock);
/* fsnotify_recalc_inode_mask(hinode->hi_inode); */
}
/* ---------------------------------------------------------------------- */
/* #define AuDbgHnotify */
#ifdef AuDbgHnotify
static char *au_hfsn_name(u32 mask)
{
#ifdef CONFIG_AUFS_DEBUG
#define test_ret(flag) \
do { \
if (mask & flag) \
return #flag; \
} while (0)
test_ret(FS_ACCESS);
test_ret(FS_MODIFY);
test_ret(FS_ATTRIB);
test_ret(FS_CLOSE_WRITE);
test_ret(FS_CLOSE_NOWRITE);
test_ret(FS_OPEN);
test_ret(FS_MOVED_FROM);
test_ret(FS_MOVED_TO);
test_ret(FS_CREATE);
test_ret(FS_DELETE);
test_ret(FS_DELETE_SELF);
test_ret(FS_MOVE_SELF);
test_ret(FS_UNMOUNT);
test_ret(FS_Q_OVERFLOW);
test_ret(FS_IN_IGNORED);
test_ret(FS_ISDIR);
test_ret(FS_IN_ONESHOT);
test_ret(FS_EVENT_ON_CHILD);
return "";
#undef test_ret
#else
return "??";
#endif
}
#endif
/* ---------------------------------------------------------------------- */
static void au_hfsn_free_group(struct fsnotify_group *group)
{
struct au_br_hfsnotify *hfsn = group->private;
/* AuDbg("here\n"); */
au_kfree_try_rcu(hfsn);
}
static int au_hfsn_handle_event(struct fsnotify_group *group,
struct inode *inode,
u32 mask, const void *data, int data_type,
const struct qstr *file_name, u32 cookie,
struct fsnotify_iter_info *iter_info)
{
int err;
struct au_hnotify *hnotify;
struct inode *h_dir, *h_inode;
struct fsnotify_mark *inode_mark;
AuDebugOn(data_type != FSNOTIFY_EVENT_INODE);
err = 0;
/* if FS_UNMOUNT happens, there must be another bug */
AuDebugOn(mask & FS_UNMOUNT);
if (mask & (FS_IN_IGNORED | FS_UNMOUNT))
goto out;
h_dir = inode;
h_inode = NULL;
#ifdef AuDbgHnotify
au_debug_on();
if (1 || h_child_qstr.len != sizeof(AUFS_XINO_FNAME) - 1
|| strncmp(h_child_qstr.name, AUFS_XINO_FNAME, h_child_qstr.len)) {
AuDbg("i%lu, mask 0x%x %s, hcname %.*s, hi%lu\n",
h_dir->i_ino, mask, au_hfsn_name(mask),
AuLNPair(&h_child_qstr), h_inode ? h_inode->i_ino : 0);
/* WARN_ON(1); */
}
au_debug_off();
#endif
inode_mark = fsnotify_iter_inode_mark(iter_info);
AuDebugOn(!inode_mark);
hnotify = container_of(inode_mark, struct au_hnotify, hn_mark);
err = au_hnotify(h_dir, hnotify, mask, file_name, h_inode);
out:
return err;
}
static struct fsnotify_ops au_hfsn_ops = {
.handle_event = au_hfsn_handle_event,
.free_group_priv = au_hfsn_free_group,
.free_mark = au_hfsn_free_mark
};
/* ---------------------------------------------------------------------- */
static void au_hfsn_fin_br(struct au_branch *br)
{
struct au_br_hfsnotify *hfsn;
hfsn = br->br_hfsn;
if (hfsn) {
lockdep_off();
fsnotify_put_group(hfsn->hfsn_group);
lockdep_on();
}
}
static int au_hfsn_init_br(struct au_branch *br, int perm)
{
int err;
struct fsnotify_group *group;
struct au_br_hfsnotify *hfsn;
err = 0;
br->br_hfsn = NULL;
if (!au_br_hnotifyable(perm))
goto out;
err = -ENOMEM;
hfsn = kmalloc(sizeof(*hfsn), GFP_NOFS);
if (unlikely(!hfsn))
goto out;
err = 0;
group = fsnotify_alloc_group(&au_hfsn_ops);
if (IS_ERR(group)) {
err = PTR_ERR(group);
pr_err("fsnotify_alloc_group() failed, %d\n", err);
goto out_hfsn;
}
group->private = hfsn;
hfsn->hfsn_group = group;
br->br_hfsn = hfsn;
goto out; /* success */
out_hfsn:
au_kfree_try_rcu(hfsn);
out:
return err;
}
static int au_hfsn_reset_br(unsigned int udba, struct au_branch *br, int perm)
{
int err;
err = 0;
if (!br->br_hfsn)
err = au_hfsn_init_br(br, perm);
return err;
}
/* ---------------------------------------------------------------------- */
static void au_hfsn_fin(void)
{
AuDbg("au_hfsn_ifree %lld\n", (long long)atomic64_read(&au_hfsn_ifree));
wait_event(au_hfsn_wq, !atomic64_read(&au_hfsn_ifree));
}
const struct au_hnotify_op au_hnotify_op = {
.ctl = au_hfsn_ctl,
.alloc = au_hfsn_alloc,
.free = au_hfsn_free,
.fin = au_hfsn_fin,
.reset_br = au_hfsn_reset_br,
.fin_br = au_hfsn_fin_br,
.init_br = au_hfsn_init_br
};

60
fs/aufs/hfsplus.c Normal file
View File

@ -0,0 +1,60 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2010-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* special support for filesystems which acquires an inode mutex
* at final closing a file, eg, hfsplus.
*
* This trick is very simple and stupid, just to open the file before really
* necessary open to tell hfsplus that this is not the final closing.
* The caller should call au_h_open_pre() after acquiring the inode mutex,
* and au_h_open_post() after releasing it.
*/
#include "aufs.h"
struct file *au_h_open_pre(struct dentry *dentry, aufs_bindex_t bindex,
int force_wr)
{
struct file *h_file;
struct dentry *h_dentry;
h_dentry = au_h_dptr(dentry, bindex);
AuDebugOn(!h_dentry);
AuDebugOn(d_is_negative(h_dentry));
h_file = NULL;
if (au_test_hfsplus(h_dentry->d_sb)
&& d_is_reg(h_dentry))
h_file = au_h_open(dentry, bindex,
O_RDONLY | O_NOATIME | O_LARGEFILE,
/*file*/NULL, force_wr);
return h_file;
}
void au_h_open_post(struct dentry *dentry, aufs_bindex_t bindex,
struct file *h_file)
{
struct au_branch *br;
if (h_file) {
fput(h_file);
br = au_sbr(dentry->d_sb, bindex);
au_lcnt_dec(&br->br_nfiles);
}
}

715
fs/aufs/hnotify.c Normal file
View File

@ -0,0 +1,715 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* abstraction to notify the direct changes on lower directories
*/
/* #include <linux/iversion.h> */
#include "aufs.h"
int au_hn_alloc(struct au_hinode *hinode, struct inode *inode)
{
int err;
struct au_hnotify *hn;
err = -ENOMEM;
hn = au_cache_alloc_hnotify();
if (hn) {
hn->hn_aufs_inode = inode;
hinode->hi_notify = hn;
err = au_hnotify_op.alloc(hinode);
AuTraceErr(err);
if (unlikely(err)) {
hinode->hi_notify = NULL;
au_cache_free_hnotify(hn);
/*
* The upper dir was removed by udba, but the same named
* dir left. In this case, aufs assigns a new inode
* number and set the monitor again.
* For the lower dir, the old monitor is still left.
*/
if (err == -EEXIST)
err = 0;
}
}
AuTraceErr(err);
return err;
}
void au_hn_free(struct au_hinode *hinode)
{
struct au_hnotify *hn;
hn = hinode->hi_notify;
if (hn) {
hinode->hi_notify = NULL;
if (au_hnotify_op.free(hinode, hn))
au_cache_free_hnotify(hn);
}
}
/* ---------------------------------------------------------------------- */
void au_hn_ctl(struct au_hinode *hinode, int do_set)
{
if (hinode->hi_notify)
au_hnotify_op.ctl(hinode, do_set);
}
void au_hn_reset(struct inode *inode, unsigned int flags)
{
aufs_bindex_t bindex, bbot;
struct inode *hi;
struct dentry *iwhdentry;
bbot = au_ibbot(inode);
for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
hi = au_h_iptr(inode, bindex);
if (!hi)
continue;
/* inode_lock_nested(hi, AuLsc_I_CHILD); */
iwhdentry = au_hi_wh(inode, bindex);
if (iwhdentry)
dget(iwhdentry);
au_igrab(hi);
au_set_h_iptr(inode, bindex, NULL, 0);
au_set_h_iptr(inode, bindex, au_igrab(hi),
flags & ~AuHi_XINO);
iput(hi);
dput(iwhdentry);
/* inode_unlock(hi); */
}
}
/* ---------------------------------------------------------------------- */
static int hn_xino(struct inode *inode, struct inode *h_inode)
{
int err;
aufs_bindex_t bindex, bbot, bfound, btop;
struct inode *h_i;
err = 0;
if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
pr_warn("branch root dir was changed\n");
goto out;
}
bfound = -1;
bbot = au_ibbot(inode);
btop = au_ibtop(inode);
#if 0 /* reserved for future use */
if (bindex == bbot) {
/* keep this ino in rename case */
goto out;
}
#endif
for (bindex = btop; bindex <= bbot; bindex++)
if (au_h_iptr(inode, bindex) == h_inode) {
bfound = bindex;
break;
}
if (bfound < 0)
goto out;
for (bindex = btop; bindex <= bbot; bindex++) {
h_i = au_h_iptr(inode, bindex);
if (!h_i)
continue;
err = au_xino_write(inode->i_sb, bindex, h_i->i_ino, /*ino*/0);
/* ignore this error */
/* bad action? */
}
/* children inode number will be broken */
out:
AuTraceErr(err);
return err;
}
static int hn_gen_tree(struct dentry *dentry)
{
int err, i, j, ndentry;
struct au_dcsub_pages dpages;
struct au_dpage *dpage;
struct dentry **dentries;
err = au_dpages_init(&dpages, GFP_NOFS);
if (unlikely(err))
goto out;
err = au_dcsub_pages(&dpages, dentry, NULL, NULL);
if (unlikely(err))
goto out_dpages;
for (i = 0; i < dpages.ndpage; i++) {
dpage = dpages.dpages + i;
dentries = dpage->dentries;
ndentry = dpage->ndentry;
for (j = 0; j < ndentry; j++) {
struct dentry *d;
d = dentries[j];
if (IS_ROOT(d))
continue;
au_digen_dec(d);
if (d_really_is_positive(d))
/* todo: reset children xino?
cached children only? */
au_iigen_dec(d_inode(d));
}
}
out_dpages:
au_dpages_free(&dpages);
out:
return err;
}
/*
* return 0 if processed.
*/
static int hn_gen_by_inode(char *name, unsigned int nlen, struct inode *inode,
const unsigned int isdir)
{
int err;
struct dentry *d;
struct qstr *dname;
err = 1;
if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
pr_warn("branch root dir was changed\n");
err = 0;
goto out;
}
if (!isdir) {
AuDebugOn(!name);
au_iigen_dec(inode);
spin_lock(&inode->i_lock);
hlist_for_each_entry(d, &inode->i_dentry, d_u.d_alias) {
spin_lock(&d->d_lock);
dname = &d->d_name;
if (dname->len != nlen
&& memcmp(dname->name, name, nlen)) {
spin_unlock(&d->d_lock);
continue;
}
err = 0;
au_digen_dec(d);
spin_unlock(&d->d_lock);
break;
}
spin_unlock(&inode->i_lock);
} else {
au_fset_si(au_sbi(inode->i_sb), FAILED_REFRESH_DIR);
d = d_find_any_alias(inode);
if (!d) {
au_iigen_dec(inode);
goto out;
}
spin_lock(&d->d_lock);
dname = &d->d_name;
if (dname->len == nlen && !memcmp(dname->name, name, nlen)) {
spin_unlock(&d->d_lock);
err = hn_gen_tree(d);
spin_lock(&d->d_lock);
}
spin_unlock(&d->d_lock);
dput(d);
}
out:
AuTraceErr(err);
return err;
}
static int hn_gen_by_name(struct dentry *dentry, const unsigned int isdir)
{
int err;
if (IS_ROOT(dentry)) {
pr_warn("branch root dir was changed\n");
return 0;
}
err = 0;
if (!isdir) {
au_digen_dec(dentry);
if (d_really_is_positive(dentry))
au_iigen_dec(d_inode(dentry));
} else {
au_fset_si(au_sbi(dentry->d_sb), FAILED_REFRESH_DIR);
if (d_really_is_positive(dentry))
err = hn_gen_tree(dentry);
}
AuTraceErr(err);
return err;
}
/* ---------------------------------------------------------------------- */
/* hnotify job flags */
#define AuHnJob_XINO0 1
#define AuHnJob_GEN (1 << 1)
#define AuHnJob_DIRENT (1 << 2)
#define AuHnJob_ISDIR (1 << 3)
#define AuHnJob_TRYXINO0 (1 << 4)
#define AuHnJob_MNTPNT (1 << 5)
#define au_ftest_hnjob(flags, name) ((flags) & AuHnJob_##name)
#define au_fset_hnjob(flags, name) \
do { (flags) |= AuHnJob_##name; } while (0)
#define au_fclr_hnjob(flags, name) \
do { (flags) &= ~AuHnJob_##name; } while (0)
enum {
AuHn_CHILD,
AuHn_PARENT,
AuHnLast
};
struct au_hnotify_args {
struct inode *h_dir, *dir, *h_child_inode;
u32 mask;
unsigned int flags[AuHnLast];
unsigned int h_child_nlen;
char h_child_name[];
};
struct hn_job_args {
unsigned int flags;
struct inode *inode, *h_inode, *dir, *h_dir;
struct dentry *dentry;
char *h_name;
int h_nlen;
};
static int hn_job(struct hn_job_args *a)
{
const unsigned int isdir = au_ftest_hnjob(a->flags, ISDIR);
int e;
/* reset xino */
if (au_ftest_hnjob(a->flags, XINO0) && a->inode)
hn_xino(a->inode, a->h_inode); /* ignore this error */
if (au_ftest_hnjob(a->flags, TRYXINO0)
&& a->inode
&& a->h_inode) {
inode_lock_shared_nested(a->h_inode, AuLsc_I_CHILD);
if (!a->h_inode->i_nlink
&& !(a->h_inode->i_state & I_LINKABLE))
hn_xino(a->inode, a->h_inode); /* ignore this error */
inode_unlock_shared(a->h_inode);
}
/* make the generation obsolete */
if (au_ftest_hnjob(a->flags, GEN)) {
e = -1;
if (a->inode)
e = hn_gen_by_inode(a->h_name, a->h_nlen, a->inode,
isdir);
if (e && a->dentry)
hn_gen_by_name(a->dentry, isdir);
/* ignore this error */
}
/* make dir entries obsolete */
if (au_ftest_hnjob(a->flags, DIRENT) && a->inode) {
struct au_vdir *vdir;
vdir = au_ivdir(a->inode);
if (vdir)
vdir->vd_jiffy = 0;
/* IMustLock(a->inode); */
/* inode_inc_iversion(a->inode); */
}
/* can do nothing but warn */
if (au_ftest_hnjob(a->flags, MNTPNT)
&& a->dentry
&& d_mountpoint(a->dentry))
pr_warn("mount-point %pd is removed or renamed\n", a->dentry);
return 0;
}
/* ---------------------------------------------------------------------- */
static struct dentry *lookup_wlock_by_name(char *name, unsigned int nlen,
struct inode *dir)
{
struct dentry *dentry, *d, *parent;
struct qstr *dname;
parent = d_find_any_alias(dir);
if (!parent)
return NULL;
dentry = NULL;
spin_lock(&parent->d_lock);
list_for_each_entry(d, &parent->d_subdirs, d_child) {
/* AuDbg("%pd\n", d); */
spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
dname = &d->d_name;
if (dname->len != nlen || memcmp(dname->name, name, nlen))
goto cont_unlock;
if (au_di(d))
au_digen_dec(d);
else
goto cont_unlock;
if (au_dcount(d) > 0) {
dentry = dget_dlock(d);
spin_unlock(&d->d_lock);
break;
}
cont_unlock:
spin_unlock(&d->d_lock);
}
spin_unlock(&parent->d_lock);
dput(parent);
if (dentry)
di_write_lock_child(dentry);
return dentry;
}
static struct inode *lookup_wlock_by_ino(struct super_block *sb,
aufs_bindex_t bindex, ino_t h_ino)
{
struct inode *inode;
ino_t ino;
int err;
inode = NULL;
err = au_xino_read(sb, bindex, h_ino, &ino);
if (!err && ino)
inode = ilookup(sb, ino);
if (!inode)
goto out;
if (unlikely(inode->i_ino == AUFS_ROOT_INO)) {
pr_warn("wrong root branch\n");
iput(inode);
inode = NULL;
goto out;
}
ii_write_lock_child(inode);
out:
return inode;
}
static void au_hn_bh(void *_args)
{
struct au_hnotify_args *a = _args;
struct super_block *sb;
aufs_bindex_t bindex, bbot, bfound;
unsigned char xino, try_iput;
int err;
struct inode *inode;
ino_t h_ino;
struct hn_job_args args;
struct dentry *dentry;
struct au_sbinfo *sbinfo;
AuDebugOn(!_args);
AuDebugOn(!a->h_dir);
AuDebugOn(!a->dir);
AuDebugOn(!a->mask);
AuDbg("mask 0x%x, i%lu, hi%lu, hci%lu\n",
a->mask, a->dir->i_ino, a->h_dir->i_ino,
a->h_child_inode ? a->h_child_inode->i_ino : 0);
inode = NULL;
dentry = NULL;
/*
* do not lock a->dir->i_mutex here
* because of d_revalidate() may cause a deadlock.
*/
sb = a->dir->i_sb;
AuDebugOn(!sb);
sbinfo = au_sbi(sb);
AuDebugOn(!sbinfo);
si_write_lock(sb, AuLock_NOPLMW);
if (au_opt_test(sbinfo->si_mntflags, DIRREN))
switch (a->mask & FS_EVENTS_POSS_ON_CHILD) {
case FS_MOVED_FROM:
case FS_MOVED_TO:
AuWarn1("DIRREN with UDBA may not work correctly "
"for the direct rename(2)\n");
}
ii_read_lock_parent(a->dir);
bfound = -1;
bbot = au_ibbot(a->dir);
for (bindex = au_ibtop(a->dir); bindex <= bbot; bindex++)
if (au_h_iptr(a->dir, bindex) == a->h_dir) {
bfound = bindex;
break;
}
ii_read_unlock(a->dir);
if (unlikely(bfound < 0))
goto out;
xino = !!au_opt_test(au_mntflags(sb), XINO);
h_ino = 0;
if (a->h_child_inode)
h_ino = a->h_child_inode->i_ino;
if (a->h_child_nlen
&& (au_ftest_hnjob(a->flags[AuHn_CHILD], GEN)
|| au_ftest_hnjob(a->flags[AuHn_CHILD], MNTPNT)))
dentry = lookup_wlock_by_name(a->h_child_name, a->h_child_nlen,
a->dir);
try_iput = 0;
if (dentry && d_really_is_positive(dentry))
inode = d_inode(dentry);
if (xino && !inode && h_ino
&& (au_ftest_hnjob(a->flags[AuHn_CHILD], XINO0)
|| au_ftest_hnjob(a->flags[AuHn_CHILD], TRYXINO0)
|| au_ftest_hnjob(a->flags[AuHn_CHILD], GEN))) {
inode = lookup_wlock_by_ino(sb, bfound, h_ino);
try_iput = 1;
}
args.flags = a->flags[AuHn_CHILD];
args.dentry = dentry;
args.inode = inode;
args.h_inode = a->h_child_inode;
args.dir = a->dir;
args.h_dir = a->h_dir;
args.h_name = a->h_child_name;
args.h_nlen = a->h_child_nlen;
err = hn_job(&args);
if (dentry) {
if (au_di(dentry))
di_write_unlock(dentry);
dput(dentry);
}
if (inode && try_iput) {
ii_write_unlock(inode);
iput(inode);
}
ii_write_lock_parent(a->dir);
args.flags = a->flags[AuHn_PARENT];
args.dentry = NULL;
args.inode = a->dir;
args.h_inode = a->h_dir;
args.dir = NULL;
args.h_dir = NULL;
args.h_name = NULL;
args.h_nlen = 0;
err = hn_job(&args);
ii_write_unlock(a->dir);
out:
iput(a->h_child_inode);
iput(a->h_dir);
iput(a->dir);
si_write_unlock(sb);
au_nwt_done(&sbinfo->si_nowait);
au_kfree_rcu(a);
}
/* ---------------------------------------------------------------------- */
int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
const struct qstr *h_child_qstr, struct inode *h_child_inode)
{
int err, len;
unsigned int flags[AuHnLast], f;
unsigned char isdir, isroot, wh;
struct inode *dir;
struct au_hnotify_args *args;
char *p, *h_child_name;
err = 0;
AuDebugOn(!hnotify || !hnotify->hn_aufs_inode);
dir = igrab(hnotify->hn_aufs_inode);
if (!dir)
goto out;
isroot = (dir->i_ino == AUFS_ROOT_INO);
wh = 0;
h_child_name = (void *)h_child_qstr->name;
len = h_child_qstr->len;
if (h_child_name) {
if (len > AUFS_WH_PFX_LEN
&& !memcmp(h_child_name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
h_child_name += AUFS_WH_PFX_LEN;
len -= AUFS_WH_PFX_LEN;
wh = 1;
}
}
isdir = 0;
if (h_child_inode)
isdir = !!S_ISDIR(h_child_inode->i_mode);
flags[AuHn_PARENT] = AuHnJob_ISDIR;
flags[AuHn_CHILD] = 0;
if (isdir)
flags[AuHn_CHILD] = AuHnJob_ISDIR;
au_fset_hnjob(flags[AuHn_PARENT], DIRENT);
au_fset_hnjob(flags[AuHn_CHILD], GEN);
switch (mask & ALL_FSNOTIFY_DIRENT_EVENTS) {
case FS_MOVED_FROM:
case FS_MOVED_TO:
au_fset_hnjob(flags[AuHn_CHILD], XINO0);
au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
/*FALLTHROUGH*/
case FS_CREATE:
AuDebugOn(!h_child_name);
break;
case FS_DELETE:
/*
* aufs never be able to get this child inode.
* revalidation should be in d_revalidate()
* by checking i_nlink, i_generation or d_unhashed().
*/
AuDebugOn(!h_child_name);
au_fset_hnjob(flags[AuHn_CHILD], TRYXINO0);
au_fset_hnjob(flags[AuHn_CHILD], MNTPNT);
break;
default:
AuDebugOn(1);
}
if (wh)
h_child_inode = NULL;
err = -ENOMEM;
/* iput() and kfree() will be called in au_hnotify() */
args = kmalloc(sizeof(*args) + len + 1, GFP_NOFS);
if (unlikely(!args)) {
AuErr1("no memory\n");
iput(dir);
goto out;
}
args->flags[AuHn_PARENT] = flags[AuHn_PARENT];
args->flags[AuHn_CHILD] = flags[AuHn_CHILD];
args->mask = mask;
args->dir = dir;
args->h_dir = igrab(h_dir);
if (h_child_inode)
h_child_inode = igrab(h_child_inode); /* can be NULL */
args->h_child_inode = h_child_inode;
args->h_child_nlen = len;
if (len) {
p = (void *)args;
p += sizeof(*args);
memcpy(p, h_child_name, len);
p[len] = 0;
}
/* NFS fires the event for silly-renamed one from kworker */
f = 0;
if (!dir->i_nlink
|| (au_test_nfs(h_dir->i_sb) && (mask & FS_DELETE)))
f = AuWkq_NEST;
err = au_wkq_nowait(au_hn_bh, args, dir->i_sb, f);
if (unlikely(err)) {
pr_err("wkq %d\n", err);
iput(args->h_child_inode);
iput(args->h_dir);
iput(args->dir);
au_kfree_rcu(args);
}
out:
return err;
}
/* ---------------------------------------------------------------------- */
int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm)
{
int err;
AuDebugOn(!(udba & AuOptMask_UDBA));
err = 0;
if (au_hnotify_op.reset_br)
err = au_hnotify_op.reset_br(udba, br, perm);
return err;
}
int au_hnotify_init_br(struct au_branch *br, int perm)
{
int err;
err = 0;
if (au_hnotify_op.init_br)
err = au_hnotify_op.init_br(br, perm);
return err;
}
void au_hnotify_fin_br(struct au_branch *br)
{
if (au_hnotify_op.fin_br)
au_hnotify_op.fin_br(br);
}
static void au_hn_destroy_cache(void)
{
kmem_cache_destroy(au_cache[AuCache_HNOTIFY]);
au_cache[AuCache_HNOTIFY] = NULL;
}
int __init au_hnotify_init(void)
{
int err;
err = -ENOMEM;
au_cache[AuCache_HNOTIFY] = AuCache(au_hnotify);
if (au_cache[AuCache_HNOTIFY]) {
err = 0;
if (au_hnotify_op.init)
err = au_hnotify_op.init();
if (unlikely(err))
au_hn_destroy_cache();
}
AuTraceErr(err);
return err;
}
void au_hnotify_fin(void)
{
if (au_hnotify_op.fin)
au_hnotify_op.fin();
/* cf. au_cache_fin() */
if (au_cache[AuCache_HNOTIFY])
au_hn_destroy_cache();
}

1502
fs/aufs/i_op.c Normal file

File diff suppressed because it is too large Load Diff

936
fs/aufs/i_op_add.c Normal file
View File

@ -0,0 +1,936 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* inode operations (add entry)
*/
#include <linux/iversion.h>
#include "aufs.h"
/*
* final procedure of adding a new entry, except link(2).
* remove whiteout, instantiate, copyup the parent dir's times and size
* and update version.
* if it failed, re-create the removed whiteout.
*/
static int epilog(struct inode *dir, aufs_bindex_t bindex,
struct dentry *wh_dentry, struct dentry *dentry)
{
int err, rerr;
aufs_bindex_t bwh;
struct path h_path;
struct super_block *sb;
struct inode *inode, *h_dir;
struct dentry *wh;
bwh = -1;
sb = dir->i_sb;
if (wh_dentry) {
h_dir = d_inode(wh_dentry->d_parent); /* dir inode is locked */
IMustLock(h_dir);
AuDebugOn(au_h_iptr(dir, bindex) != h_dir);
bwh = au_dbwh(dentry);
h_path.dentry = wh_dentry;
h_path.mnt = au_sbr_mnt(sb, bindex);
err = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path,
dentry);
if (unlikely(err))
goto out;
}
inode = au_new_inode(dentry, /*must_new*/1);
if (!IS_ERR(inode)) {
d_instantiate(dentry, inode);
dir = d_inode(dentry->d_parent); /* dir inode is locked */
IMustLock(dir);
au_dir_ts(dir, bindex);
inode_inc_iversion(dir);
au_fhsm_wrote(sb, bindex, /*force*/0);
return 0; /* success */
}
err = PTR_ERR(inode);
if (!wh_dentry)
goto out;
/* revert */
/* dir inode is locked */
wh = au_wh_create(dentry, bwh, wh_dentry->d_parent);
rerr = PTR_ERR(wh);
if (IS_ERR(wh)) {
AuIOErr("%pd reverting whiteout failed(%d, %d)\n",
dentry, err, rerr);
err = -EIO;
} else
dput(wh);
out:
return err;
}
static int au_d_may_add(struct dentry *dentry)
{
int err;
err = 0;
if (unlikely(d_unhashed(dentry)))
err = -ENOENT;
if (unlikely(d_really_is_positive(dentry)))
err = -EEXIST;
return err;
}
/*
* simple tests for the adding inode operations.
* following the checks in vfs, plus the parent-child relationship.
*/
int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
struct dentry *h_parent, int isdir)
{
int err;
umode_t h_mode;
struct dentry *h_dentry;
struct inode *h_inode;
err = -ENAMETOOLONG;
if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
goto out;
h_dentry = au_h_dptr(dentry, bindex);
if (d_really_is_negative(dentry)) {
err = -EEXIST;
if (unlikely(d_is_positive(h_dentry)))
goto out;
} else {
/* rename(2) case */
err = -EIO;
if (unlikely(d_is_negative(h_dentry)))
goto out;
h_inode = d_inode(h_dentry);
if (unlikely(!h_inode->i_nlink))
goto out;
h_mode = h_inode->i_mode;
if (!isdir) {
err = -EISDIR;
if (unlikely(S_ISDIR(h_mode)))
goto out;
} else if (unlikely(!S_ISDIR(h_mode))) {
err = -ENOTDIR;
goto out;
}
}
err = 0;
/* expected parent dir is locked */
if (unlikely(h_parent != h_dentry->d_parent))
err = -EIO;
out:
AuTraceErr(err);
return err;
}
/*
* initial procedure of adding a new entry.
* prepare writable branch and the parent dir, lock it,
* and lookup whiteout for the new entry.
*/
static struct dentry*
lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
struct dentry *src_dentry, struct au_pin *pin,
struct au_wr_dir_args *wr_dir_args)
{
struct dentry *wh_dentry, *h_parent;
struct super_block *sb;
struct au_branch *br;
int err;
unsigned int udba;
aufs_bindex_t bcpup;
AuDbg("%pd\n", dentry);
err = au_wr_dir(dentry, src_dentry, wr_dir_args);
bcpup = err;
wh_dentry = ERR_PTR(err);
if (unlikely(err < 0))
goto out;
sb = dentry->d_sb;
udba = au_opt_udba(sb);
err = au_pin(pin, dentry, bcpup, udba,
AuPin_DI_LOCKED | AuPin_MNT_WRITE);
wh_dentry = ERR_PTR(err);
if (unlikely(err))
goto out;
h_parent = au_pinned_h_parent(pin);
if (udba != AuOpt_UDBA_NONE
&& au_dbtop(dentry) == bcpup)
err = au_may_add(dentry, bcpup, h_parent,
au_ftest_wrdir(wr_dir_args->flags, ISDIR));
else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
err = -ENAMETOOLONG;
wh_dentry = ERR_PTR(err);
if (unlikely(err))
goto out_unpin;
br = au_sbr(sb, bcpup);
if (dt) {
struct path tmp = {
.dentry = h_parent,
.mnt = au_br_mnt(br)
};
au_dtime_store(dt, au_pinned_parent(pin), &tmp);
}
wh_dentry = NULL;
if (bcpup != au_dbwh(dentry))
goto out; /* success */
/*
* ENAMETOOLONG here means that if we allowed create such name, then it
* would not be able to removed in the future. So we don't allow such
* name here and we don't handle ENAMETOOLONG differently here.
*/
wh_dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
out_unpin:
if (IS_ERR(wh_dentry))
au_unpin(pin);
out:
return wh_dentry;
}
/* ---------------------------------------------------------------------- */
enum { Mknod, Symlink, Creat };
struct simple_arg {
int type;
union {
struct {
umode_t mode;
bool want_excl;
bool try_aopen;
struct vfsub_aopen_args *aopen;
} c;
struct {
const char *symname;
} s;
struct {
umode_t mode;
dev_t dev;
} m;
} u;
};
static int add_simple(struct inode *dir, struct dentry *dentry,
struct simple_arg *arg)
{
int err, rerr;
aufs_bindex_t btop;
unsigned char created;
const unsigned char try_aopen
= (arg->type == Creat && arg->u.c.try_aopen);
struct vfsub_aopen_args *aopen = arg->u.c.aopen;
struct dentry *wh_dentry, *parent;
struct inode *h_dir;
struct super_block *sb;
struct au_branch *br;
/* to reduce stack size */
struct {
struct au_dtime dt;
struct au_pin pin;
struct path h_path;
struct au_wr_dir_args wr_dir_args;
} *a;
AuDbg("%pd\n", dentry);
IMustLock(dir);
err = -ENOMEM;
a = kmalloc(sizeof(*a), GFP_NOFS);
if (unlikely(!a))
goto out;
a->wr_dir_args.force_btgt = -1;
a->wr_dir_args.flags = AuWrDir_ADD_ENTRY;
parent = dentry->d_parent; /* dir inode is locked */
if (!try_aopen) {
err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
if (unlikely(err))
goto out_free;
}
err = au_d_may_add(dentry);
if (unlikely(err))
goto out_unlock;
if (!try_aopen)
di_write_lock_parent(parent);
wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
&a->pin, &a->wr_dir_args);
err = PTR_ERR(wh_dentry);
if (IS_ERR(wh_dentry))
goto out_parent;
btop = au_dbtop(dentry);
sb = dentry->d_sb;
br = au_sbr(sb, btop);
a->h_path.dentry = au_h_dptr(dentry, btop);
a->h_path.mnt = au_br_mnt(br);
h_dir = au_pinned_h_dir(&a->pin);
switch (arg->type) {
case Creat:
if (!try_aopen || !h_dir->i_op->atomic_open) {
err = vfsub_create(h_dir, &a->h_path, arg->u.c.mode,
arg->u.c.want_excl);
created = !err;
if (!err && try_aopen)
aopen->file->f_mode |= FMODE_CREATED;
} else {
aopen->br = br;
err = vfsub_atomic_open(h_dir, a->h_path.dentry, aopen);
AuDbg("err %d\n", err);
AuDbgFile(aopen->file);
created = err >= 0
&& !!(aopen->file->f_mode & FMODE_CREATED);
}
break;
case Symlink:
err = vfsub_symlink(h_dir, &a->h_path, arg->u.s.symname);
created = !err;
break;
case Mknod:
err = vfsub_mknod(h_dir, &a->h_path, arg->u.m.mode,
arg->u.m.dev);
created = !err;
break;
default:
BUG();
}
if (unlikely(err < 0))
goto out_unpin;
err = epilog(dir, btop, wh_dentry, dentry);
if (!err)
goto out_unpin; /* success */
/* revert */
if (created /* && d_is_positive(a->h_path.dentry) */) {
/* no delegation since it is just created */
rerr = vfsub_unlink(h_dir, &a->h_path, /*delegated*/NULL,
/*force*/0);
if (rerr) {
AuIOErr("%pd revert failure(%d, %d)\n",
dentry, err, rerr);
err = -EIO;
}
au_dtime_revert(&a->dt);
}
if (try_aopen && h_dir->i_op->atomic_open
&& (aopen->file->f_mode & FMODE_OPENED))
/* aopen->file is still opened */
au_lcnt_dec(&aopen->br->br_nfiles);
out_unpin:
au_unpin(&a->pin);
dput(wh_dentry);
out_parent:
if (!try_aopen)
di_write_unlock(parent);
out_unlock:
if (unlikely(err)) {
au_update_dbtop(dentry);
d_drop(dentry);
}
if (!try_aopen)
aufs_read_unlock(dentry, AuLock_DW);
out_free:
au_kfree_rcu(a);
out:
return err;
}
int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
dev_t dev)
{
struct simple_arg arg = {
.type = Mknod,
.u.m = {
.mode = mode,
.dev = dev
}
};
return add_simple(dir, dentry, &arg);
}
int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
{
struct simple_arg arg = {
.type = Symlink,
.u.s.symname = symname
};
return add_simple(dir, dentry, &arg);
}
int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool want_excl)
{
struct simple_arg arg = {
.type = Creat,
.u.c = {
.mode = mode,
.want_excl = want_excl
}
};
return add_simple(dir, dentry, &arg);
}
int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
struct vfsub_aopen_args *aopen_args)
{
struct simple_arg arg = {
.type = Creat,
.u.c = {
.mode = aopen_args->create_mode,
.want_excl = aopen_args->open_flag & O_EXCL,
.try_aopen = true,
.aopen = aopen_args
}
};
return add_simple(dir, dentry, &arg);
}
int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
{
int err;
aufs_bindex_t bindex;
struct super_block *sb;
struct dentry *parent, *h_parent, *h_dentry;
struct inode *h_dir, *inode;
struct vfsmount *h_mnt;
struct au_wr_dir_args wr_dir_args = {
.force_btgt = -1,
.flags = AuWrDir_TMPFILE
};
/* copy-up may happen */
inode_lock(dir);
sb = dir->i_sb;
err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
if (unlikely(err))
goto out;
err = au_di_init(dentry);
if (unlikely(err))
goto out_si;
err = -EBUSY;
parent = d_find_any_alias(dir);
AuDebugOn(!parent);
di_write_lock_parent(parent);
if (unlikely(d_inode(parent) != dir))
goto out_parent;
err = au_digen_test(parent, au_sigen(sb));
if (unlikely(err))
goto out_parent;
bindex = au_dbtop(parent);
au_set_dbtop(dentry, bindex);
au_set_dbbot(dentry, bindex);
err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
bindex = err;
if (unlikely(err < 0))
goto out_parent;
err = -EOPNOTSUPP;
h_dir = au_h_iptr(dir, bindex);
if (unlikely(!h_dir->i_op->tmpfile))
goto out_parent;
h_mnt = au_sbr_mnt(sb, bindex);
err = vfsub_mnt_want_write(h_mnt);
if (unlikely(err))
goto out_parent;
h_parent = au_h_dptr(parent, bindex);
h_dentry = vfs_tmpfile(h_parent, mode, /*open_flag*/0);
if (IS_ERR(h_dentry)) {
err = PTR_ERR(h_dentry);
goto out_mnt;
}
au_set_dbtop(dentry, bindex);
au_set_dbbot(dentry, bindex);
au_set_h_dptr(dentry, bindex, dget(h_dentry));
inode = au_new_inode(dentry, /*must_new*/1);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
au_set_h_dptr(dentry, bindex, NULL);
au_set_dbtop(dentry, -1);
au_set_dbbot(dentry, -1);
} else {
if (!inode->i_nlink)
set_nlink(inode, 1);
d_tmpfile(dentry, inode);
au_di(dentry)->di_tmpfile = 1;
/* update without i_mutex */
if (au_ibtop(dir) == au_dbtop(dentry))
au_cpup_attr_timesizes(dir);
}
dput(h_dentry);
out_mnt:
vfsub_mnt_drop_write(h_mnt);
out_parent:
di_write_unlock(parent);
dput(parent);
di_write_unlock(dentry);
if (unlikely(err)) {
au_di_fin(dentry);
dentry->d_fsdata = NULL;
}
out_si:
si_read_unlock(sb);
out:
inode_unlock(dir);
return err;
}
/* ---------------------------------------------------------------------- */
struct au_link_args {
aufs_bindex_t bdst, bsrc;
struct au_pin pin;
struct path h_path;
struct dentry *src_parent, *parent;
};
static int au_cpup_before_link(struct dentry *src_dentry,
struct au_link_args *a)
{
int err;
struct dentry *h_src_dentry;
struct au_cp_generic cpg = {
.dentry = src_dentry,
.bdst = a->bdst,
.bsrc = a->bsrc,
.len = -1,
.pin = &a->pin,
.flags = AuCpup_DTIME | AuCpup_HOPEN /* | AuCpup_KEEPLINO */
};
di_read_lock_parent(a->src_parent, AuLock_IR);
err = au_test_and_cpup_dirs(src_dentry, a->bdst);
if (unlikely(err))
goto out;
h_src_dentry = au_h_dptr(src_dentry, a->bsrc);
err = au_pin(&a->pin, src_dentry, a->bdst,
au_opt_udba(src_dentry->d_sb),
AuPin_DI_LOCKED | AuPin_MNT_WRITE);
if (unlikely(err))
goto out;
err = au_sio_cpup_simple(&cpg);
au_unpin(&a->pin);
out:
di_read_unlock(a->src_parent, AuLock_IR);
return err;
}
static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry,
struct au_link_args *a)
{
int err;
unsigned char plink;
aufs_bindex_t bbot;
struct dentry *h_src_dentry;
struct inode *h_inode, *inode, *delegated;
struct super_block *sb;
struct file *h_file;
plink = 0;
h_inode = NULL;
sb = src_dentry->d_sb;
inode = d_inode(src_dentry);
if (au_ibtop(inode) <= a->bdst)
h_inode = au_h_iptr(inode, a->bdst);
if (!h_inode || !h_inode->i_nlink) {
/* copyup src_dentry as the name of dentry. */
bbot = au_dbbot(dentry);
if (bbot < a->bsrc)
au_set_dbbot(dentry, a->bsrc);
au_set_h_dptr(dentry, a->bsrc,
dget(au_h_dptr(src_dentry, a->bsrc)));
dget(a->h_path.dentry);
au_set_h_dptr(dentry, a->bdst, NULL);
AuDbg("temporary d_inode...\n");
spin_lock(&dentry->d_lock);
dentry->d_inode = d_inode(src_dentry); /* tmp */
spin_unlock(&dentry->d_lock);
h_file = au_h_open_pre(dentry, a->bsrc, /*force_wr*/0);
if (IS_ERR(h_file))
err = PTR_ERR(h_file);
else {
struct au_cp_generic cpg = {
.dentry = dentry,
.bdst = a->bdst,
.bsrc = -1,
.len = -1,
.pin = &a->pin,
.flags = AuCpup_KEEPLINO
};
err = au_sio_cpup_simple(&cpg);
au_h_open_post(dentry, a->bsrc, h_file);
if (!err) {
dput(a->h_path.dentry);
a->h_path.dentry = au_h_dptr(dentry, a->bdst);
} else
au_set_h_dptr(dentry, a->bdst,
a->h_path.dentry);
}
spin_lock(&dentry->d_lock);
dentry->d_inode = NULL; /* restore */
spin_unlock(&dentry->d_lock);
AuDbg("temporary d_inode...done\n");
au_set_h_dptr(dentry, a->bsrc, NULL);
au_set_dbbot(dentry, bbot);
} else {
/* the inode of src_dentry already exists on a.bdst branch */
h_src_dentry = d_find_alias(h_inode);
if (!h_src_dentry && au_plink_test(inode)) {
plink = 1;
h_src_dentry = au_plink_lkup(inode, a->bdst);
err = PTR_ERR(h_src_dentry);
if (IS_ERR(h_src_dentry))
goto out;
if (unlikely(d_is_negative(h_src_dentry))) {
dput(h_src_dentry);
h_src_dentry = NULL;
}
}
if (h_src_dentry) {
delegated = NULL;
err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
&a->h_path, &delegated);
if (unlikely(err == -EWOULDBLOCK)) {
pr_warn("cannot retry for NFSv4 delegation"
" for an internal link\n");
iput(delegated);
}
dput(h_src_dentry);
} else {
AuIOErr("no dentry found for hi%lu on b%d\n",
h_inode->i_ino, a->bdst);
err = -EIO;
}
}
if (!err && !plink)
au_plink_append(inode, a->bdst, a->h_path.dentry);
out:
AuTraceErr(err);
return err;
}
int aufs_link(struct dentry *src_dentry, struct inode *dir,
struct dentry *dentry)
{
int err, rerr;
struct au_dtime dt;
struct au_link_args *a;
struct dentry *wh_dentry, *h_src_dentry;
struct inode *inode, *delegated;
struct super_block *sb;
struct au_wr_dir_args wr_dir_args = {
/* .force_btgt = -1, */
.flags = AuWrDir_ADD_ENTRY
};
IMustLock(dir);
inode = d_inode(src_dentry);
IMustLock(inode);
err = -ENOMEM;
a = kzalloc(sizeof(*a), GFP_NOFS);
if (unlikely(!a))
goto out;
a->parent = dentry->d_parent; /* dir inode is locked */
err = aufs_read_and_write_lock2(dentry, src_dentry,
AuLock_NOPLM | AuLock_GEN);
if (unlikely(err))
goto out_kfree;
err = au_d_linkable(src_dentry);
if (unlikely(err))
goto out_unlock;
err = au_d_may_add(dentry);
if (unlikely(err))
goto out_unlock;
a->src_parent = dget_parent(src_dentry);
wr_dir_args.force_btgt = au_ibtop(inode);
di_write_lock_parent(a->parent);
wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
wh_dentry = lock_hdir_lkup_wh(dentry, &dt, src_dentry, &a->pin,
&wr_dir_args);
err = PTR_ERR(wh_dentry);
if (IS_ERR(wh_dentry))
goto out_parent;
err = 0;
sb = dentry->d_sb;
a->bdst = au_dbtop(dentry);
a->h_path.dentry = au_h_dptr(dentry, a->bdst);
a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
a->bsrc = au_ibtop(inode);
h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
if (!h_src_dentry && au_di(src_dentry)->di_tmpfile)
h_src_dentry = dget(au_hi_wh(inode, a->bsrc));
if (!h_src_dentry) {
a->bsrc = au_dbtop(src_dentry);
h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
AuDebugOn(!h_src_dentry);
} else if (IS_ERR(h_src_dentry)) {
err = PTR_ERR(h_src_dentry);
goto out_parent;
}
/*
* aufs doesn't touch the credential so
* security_dentry_create_files_as() is unnecessary.
*/
if (au_opt_test(au_mntflags(sb), PLINK)) {
if (a->bdst < a->bsrc
/* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */)
err = au_cpup_or_link(src_dentry, dentry, a);
else {
delegated = NULL;
err = vfsub_link(h_src_dentry, au_pinned_h_dir(&a->pin),
&a->h_path, &delegated);
if (unlikely(err == -EWOULDBLOCK)) {
pr_warn("cannot retry for NFSv4 delegation"
" for an internal link\n");
iput(delegated);
}
}
dput(h_src_dentry);
} else {
/*
* copyup src_dentry to the branch we process,
* and then link(2) to it.
*/
dput(h_src_dentry);
if (a->bdst < a->bsrc
/* && h_src_dentry->d_sb != a->h_path.dentry->d_sb */) {
au_unpin(&a->pin);
di_write_unlock(a->parent);
err = au_cpup_before_link(src_dentry, a);
di_write_lock_parent(a->parent);
if (!err)
err = au_pin(&a->pin, dentry, a->bdst,
au_opt_udba(sb),
AuPin_DI_LOCKED | AuPin_MNT_WRITE);
if (unlikely(err))
goto out_wh;
}
if (!err) {
h_src_dentry = au_h_dptr(src_dentry, a->bdst);
err = -ENOENT;
if (h_src_dentry && d_is_positive(h_src_dentry)) {
delegated = NULL;
err = vfsub_link(h_src_dentry,
au_pinned_h_dir(&a->pin),
&a->h_path, &delegated);
if (unlikely(err == -EWOULDBLOCK)) {
pr_warn("cannot retry"
" for NFSv4 delegation"
" for an internal link\n");
iput(delegated);
}
}
}
}
if (unlikely(err))
goto out_unpin;
if (wh_dentry) {
a->h_path.dentry = wh_dentry;
err = au_wh_unlink_dentry(au_pinned_h_dir(&a->pin), &a->h_path,
dentry);
if (unlikely(err))
goto out_revert;
}
au_dir_ts(dir, a->bdst);
inode_inc_iversion(dir);
inc_nlink(inode);
inode->i_ctime = dir->i_ctime;
d_instantiate(dentry, au_igrab(inode));
if (d_unhashed(a->h_path.dentry))
/* some filesystem calls d_drop() */
d_drop(dentry);
/* some filesystems consume an inode even hardlink */
au_fhsm_wrote(sb, a->bdst, /*force*/0);
goto out_unpin; /* success */
out_revert:
/* no delegation since it is just created */
rerr = vfsub_unlink(au_pinned_h_dir(&a->pin), &a->h_path,
/*delegated*/NULL, /*force*/0);
if (unlikely(rerr)) {
AuIOErr("%pd reverting failed(%d, %d)\n", dentry, err, rerr);
err = -EIO;
}
au_dtime_revert(&dt);
out_unpin:
au_unpin(&a->pin);
out_wh:
dput(wh_dentry);
out_parent:
di_write_unlock(a->parent);
dput(a->src_parent);
out_unlock:
if (unlikely(err)) {
au_update_dbtop(dentry);
d_drop(dentry);
}
aufs_read_and_write_unlock2(dentry, src_dentry);
out_kfree:
au_kfree_rcu(a);
out:
AuTraceErr(err);
return err;
}
int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
{
int err, rerr;
aufs_bindex_t bindex;
unsigned char diropq;
struct path h_path;
struct dentry *wh_dentry, *parent, *opq_dentry;
struct inode *h_inode;
struct super_block *sb;
struct {
struct au_pin pin;
struct au_dtime dt;
} *a; /* reduce the stack usage */
struct au_wr_dir_args wr_dir_args = {
.force_btgt = -1,
.flags = AuWrDir_ADD_ENTRY | AuWrDir_ISDIR
};
IMustLock(dir);
err = -ENOMEM;
a = kmalloc(sizeof(*a), GFP_NOFS);
if (unlikely(!a))
goto out;
err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
if (unlikely(err))
goto out_free;
err = au_d_may_add(dentry);
if (unlikely(err))
goto out_unlock;
parent = dentry->d_parent; /* dir inode is locked */
di_write_lock_parent(parent);
wh_dentry = lock_hdir_lkup_wh(dentry, &a->dt, /*src_dentry*/NULL,
&a->pin, &wr_dir_args);
err = PTR_ERR(wh_dentry);
if (IS_ERR(wh_dentry))
goto out_parent;
sb = dentry->d_sb;
bindex = au_dbtop(dentry);
h_path.dentry = au_h_dptr(dentry, bindex);
h_path.mnt = au_sbr_mnt(sb, bindex);
err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
if (unlikely(err))
goto out_unpin;
/* make the dir opaque */
diropq = 0;
h_inode = d_inode(h_path.dentry);
if (wh_dentry
|| au_opt_test(au_mntflags(sb), ALWAYS_DIROPQ)) {
inode_lock_nested(h_inode, AuLsc_I_CHILD);
opq_dentry = au_diropq_create(dentry, bindex);
inode_unlock(h_inode);
err = PTR_ERR(opq_dentry);
if (IS_ERR(opq_dentry))
goto out_dir;
dput(opq_dentry);
diropq = 1;
}
err = epilog(dir, bindex, wh_dentry, dentry);
if (!err) {
inc_nlink(dir);
goto out_unpin; /* success */
}
/* revert */
if (diropq) {
AuLabel(revert opq);
inode_lock_nested(h_inode, AuLsc_I_CHILD);
rerr = au_diropq_remove(dentry, bindex);
inode_unlock(h_inode);
if (rerr) {
AuIOErr("%pd reverting diropq failed(%d, %d)\n",
dentry, err, rerr);
err = -EIO;
}
}
out_dir:
AuLabel(revert dir);
rerr = vfsub_rmdir(au_pinned_h_dir(&a->pin), &h_path);
if (rerr) {
AuIOErr("%pd reverting dir failed(%d, %d)\n",
dentry, err, rerr);
err = -EIO;
}
au_dtime_revert(&a->dt);
out_unpin:
au_unpin(&a->pin);
dput(wh_dentry);
out_parent:
di_write_unlock(parent);
out_unlock:
if (unlikely(err)) {
au_update_dbtop(dentry);
d_drop(dentry);
}
aufs_read_unlock(dentry, AuLock_DW);
out_free:
au_kfree_rcu(a);
out:
return err;
}

513
fs/aufs/i_op_del.c Normal file
View File

@ -0,0 +1,513 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* inode operations (del entry)
*/
#include <linux/iversion.h>
#include "aufs.h"
/*
* decide if a new whiteout for @dentry is necessary or not.
* when it is necessary, prepare the parent dir for the upper branch whose
* branch index is @bcpup for creation. the actual creation of the whiteout will
* be done by caller.
* return value:
* 0: wh is unnecessary
* plus: wh is necessary
* minus: error
*/
int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
{
int need_wh, err;
aufs_bindex_t btop;
struct super_block *sb;
sb = dentry->d_sb;
btop = au_dbtop(dentry);
if (*bcpup < 0) {
*bcpup = btop;
if (au_test_ro(sb, btop, d_inode(dentry))) {
err = AuWbrCopyup(au_sbi(sb), dentry);
*bcpup = err;
if (unlikely(err < 0))
goto out;
}
} else
AuDebugOn(btop < *bcpup
|| au_test_ro(sb, *bcpup, d_inode(dentry)));
AuDbg("bcpup %d, btop %d\n", *bcpup, btop);
if (*bcpup != btop) {
err = au_cpup_dirs(dentry, *bcpup);
if (unlikely(err))
goto out;
need_wh = 1;
} else {
struct au_dinfo *dinfo, *tmp;
need_wh = -ENOMEM;
dinfo = au_di(dentry);
tmp = au_di_alloc(sb, AuLsc_DI_TMP);
if (tmp) {
au_di_cp(tmp, dinfo);
au_di_swap(tmp, dinfo);
/* returns the number of positive dentries */
need_wh = au_lkup_dentry(dentry, btop + 1,
/* AuLkup_IGNORE_PERM */ 0);
au_di_swap(tmp, dinfo);
au_rw_write_unlock(&tmp->di_rwsem);
au_di_free(tmp);
}
}
AuDbg("need_wh %d\n", need_wh);
err = need_wh;
out:
return err;
}
/*
* simple tests for the del-entry operations.
* following the checks in vfs, plus the parent-child relationship.
*/
int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
struct dentry *h_parent, int isdir)
{
int err;
umode_t h_mode;
struct dentry *h_dentry, *h_latest;
struct inode *h_inode;
h_dentry = au_h_dptr(dentry, bindex);
if (d_really_is_positive(dentry)) {
err = -ENOENT;
if (unlikely(d_is_negative(h_dentry)))
goto out;
h_inode = d_inode(h_dentry);
if (unlikely(!h_inode->i_nlink))
goto out;
h_mode = h_inode->i_mode;
if (!isdir) {
err = -EISDIR;
if (unlikely(S_ISDIR(h_mode)))
goto out;
} else if (unlikely(!S_ISDIR(h_mode))) {
err = -ENOTDIR;
goto out;
}
} else {
/* rename(2) case */
err = -EIO;
if (unlikely(d_is_positive(h_dentry)))
goto out;
}
err = -ENOENT;
/* expected parent dir is locked */
if (unlikely(h_parent != h_dentry->d_parent))
goto out;
err = 0;
/*
* rmdir a dir may break the consistency on some filesystem.
* let's try heavy test.
*/
err = -EACCES;
if (unlikely(!au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1)
&& au_test_h_perm(d_inode(h_parent),
MAY_EXEC | MAY_WRITE)))
goto out;
h_latest = au_sio_lkup_one(&dentry->d_name, h_parent);
err = -EIO;
if (IS_ERR(h_latest))
goto out;
if (h_latest == h_dentry)
err = 0;
dput(h_latest);
out:
return err;
}
/*
* decide the branch where we operate for @dentry. the branch index will be set
* @rbcpup. after deciding it, 'pin' it and store the timestamps of the parent
* dir for reverting.
* when a new whiteout is necessary, create it.
*/
static struct dentry*
lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
struct au_dtime *dt, struct au_pin *pin)
{
struct dentry *wh_dentry;
struct super_block *sb;
struct path h_path;
int err, need_wh;
unsigned int udba;
aufs_bindex_t bcpup;
need_wh = au_wr_dir_need_wh(dentry, isdir, rbcpup);
wh_dentry = ERR_PTR(need_wh);
if (unlikely(need_wh < 0))
goto out;
sb = dentry->d_sb;
udba = au_opt_udba(sb);
bcpup = *rbcpup;
err = au_pin(pin, dentry, bcpup, udba,
AuPin_DI_LOCKED | AuPin_MNT_WRITE);
wh_dentry = ERR_PTR(err);
if (unlikely(err))
goto out;
h_path.dentry = au_pinned_h_parent(pin);
if (udba != AuOpt_UDBA_NONE
&& au_dbtop(dentry) == bcpup) {
err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
wh_dentry = ERR_PTR(err);
if (unlikely(err))
goto out_unpin;
}
h_path.mnt = au_sbr_mnt(sb, bcpup);
au_dtime_store(dt, au_pinned_parent(pin), &h_path);
wh_dentry = NULL;
if (!need_wh)
goto out; /* success, no need to create whiteout */
wh_dentry = au_wh_create(dentry, bcpup, h_path.dentry);
if (IS_ERR(wh_dentry))
goto out_unpin;
/* returns with the parent is locked and wh_dentry is dget-ed */
goto out; /* success */
out_unpin:
au_unpin(pin);
out:
return wh_dentry;
}
/*
* when removing a dir, rename it to a unique temporary whiteout-ed name first
* in order to be revertible and save time for removing many child whiteouts
* under the dir.
* returns 1 when there are too many child whiteout and caller should remove
* them asynchronously. returns 0 when the number of children is enough small to
* remove now or the branch fs is a remote fs.
* otherwise return an error.
*/
static int renwh_and_rmdir(struct dentry *dentry, aufs_bindex_t bindex,
struct au_nhash *whlist, struct inode *dir)
{
int rmdir_later, err, dirwh;
struct dentry *h_dentry;
struct super_block *sb;
struct inode *inode;
sb = dentry->d_sb;
SiMustAnyLock(sb);
h_dentry = au_h_dptr(dentry, bindex);
err = au_whtmp_ren(h_dentry, au_sbr(sb, bindex));
if (unlikely(err))
goto out;
/* stop monitoring */
inode = d_inode(dentry);
au_hn_free(au_hi(inode, bindex));
if (!au_test_fs_remote(h_dentry->d_sb)) {
dirwh = au_sbi(sb)->si_dirwh;
rmdir_later = (dirwh <= 1);
if (!rmdir_later)
rmdir_later = au_nhash_test_longer_wh(whlist, bindex,
dirwh);
if (rmdir_later)
return rmdir_later;
}
err = au_whtmp_rmdir(dir, bindex, h_dentry, whlist);
if (unlikely(err)) {
AuIOErr("rmdir %pd, b%d failed, %d. ignored\n",
h_dentry, bindex, err);
err = 0;
}
out:
AuTraceErr(err);
return err;
}
/*
* final procedure for deleting a entry.
* maintain dentry and iattr.
*/
static void epilog(struct inode *dir, struct dentry *dentry,
aufs_bindex_t bindex)
{
struct inode *inode;
inode = d_inode(dentry);
d_drop(dentry);
inode->i_ctime = dir->i_ctime;
au_dir_ts(dir, bindex);
inode_inc_iversion(dir);
}
/*
* when an error happened, remove the created whiteout and revert everything.
*/
static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
aufs_bindex_t bwh, struct dentry *wh_dentry,
struct dentry *dentry, struct au_dtime *dt)
{
int rerr;
struct path h_path = {
.dentry = wh_dentry,
.mnt = au_sbr_mnt(dir->i_sb, bindex)
};
rerr = au_wh_unlink_dentry(au_h_iptr(dir, bindex), &h_path, dentry);
if (!rerr) {
au_set_dbwh(dentry, bwh);
au_dtime_revert(dt);
return 0;
}
AuIOErr("%pd reverting whiteout failed(%d, %d)\n", dentry, err, rerr);
return -EIO;
}
/* ---------------------------------------------------------------------- */
int aufs_unlink(struct inode *dir, struct dentry *dentry)
{
int err;
aufs_bindex_t bwh, bindex, btop;
struct inode *inode, *h_dir, *delegated;
struct dentry *parent, *wh_dentry;
/* to reduce stack size */
struct {
struct au_dtime dt;
struct au_pin pin;
struct path h_path;
} *a;
IMustLock(dir);
err = -ENOMEM;
a = kmalloc(sizeof(*a), GFP_NOFS);
if (unlikely(!a))
goto out;
err = aufs_read_lock(dentry, AuLock_DW | AuLock_GEN);
if (unlikely(err))
goto out_free;
err = au_d_hashed_positive(dentry);
if (unlikely(err))
goto out_unlock;
inode = d_inode(dentry);
IMustLock(inode);
err = -EISDIR;
if (unlikely(d_is_dir(dentry)))
goto out_unlock; /* possible? */
btop = au_dbtop(dentry);
bwh = au_dbwh(dentry);
bindex = -1;
parent = dentry->d_parent; /* dir inode is locked */
di_write_lock_parent(parent);
wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/0, &bindex, &a->dt,
&a->pin);
err = PTR_ERR(wh_dentry);
if (IS_ERR(wh_dentry))
goto out_parent;
a->h_path.mnt = au_sbr_mnt(dentry->d_sb, btop);
a->h_path.dentry = au_h_dptr(dentry, btop);
dget(a->h_path.dentry);
if (bindex == btop) {
h_dir = au_pinned_h_dir(&a->pin);
delegated = NULL;
err = vfsub_unlink(h_dir, &a->h_path, &delegated, /*force*/0);
if (unlikely(err == -EWOULDBLOCK)) {
pr_warn("cannot retry for NFSv4 delegation"
" for an internal unlink\n");
iput(delegated);
}
} else {
/* dir inode is locked */
h_dir = d_inode(wh_dentry->d_parent);
IMustLock(h_dir);
err = 0;
}
if (!err) {
vfsub_drop_nlink(inode);
epilog(dir, dentry, bindex);
/* update target timestamps */
if (bindex == btop) {
vfsub_update_h_iattr(&a->h_path, /*did*/NULL);
/*ignore*/
inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
} else
/* todo: this timestamp may be reverted later */
inode->i_ctime = h_dir->i_ctime;
goto out_unpin; /* success */
}
/* revert */
if (wh_dentry) {
int rerr;
rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
&a->dt);
if (rerr)
err = rerr;
}
out_unpin:
au_unpin(&a->pin);
dput(wh_dentry);
dput(a->h_path.dentry);
out_parent:
di_write_unlock(parent);
out_unlock:
aufs_read_unlock(dentry, AuLock_DW);
out_free:
au_kfree_rcu(a);
out:
return err;
}
int aufs_rmdir(struct inode *dir, struct dentry *dentry)
{
int err, rmdir_later;
aufs_bindex_t bwh, bindex, btop;
struct inode *inode;
struct dentry *parent, *wh_dentry, *h_dentry;
struct au_whtmp_rmdir *args;
/* to reduce stack size */
struct {
struct au_dtime dt;
struct au_pin pin;
} *a;
IMustLock(dir);
err = -ENOMEM;
a = kmalloc(sizeof(*a), GFP_NOFS);
if (unlikely(!a))
goto out;
err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_GEN);
if (unlikely(err))
goto out_free;
err = au_alive_dir(dentry);
if (unlikely(err))
goto out_unlock;
inode = d_inode(dentry);
IMustLock(inode);
err = -ENOTDIR;
if (unlikely(!d_is_dir(dentry)))
goto out_unlock; /* possible? */
err = -ENOMEM;
args = au_whtmp_rmdir_alloc(dir->i_sb, GFP_NOFS);
if (unlikely(!args))
goto out_unlock;
parent = dentry->d_parent; /* dir inode is locked */
di_write_lock_parent(parent);
err = au_test_empty(dentry, &args->whlist);
if (unlikely(err))
goto out_parent;
btop = au_dbtop(dentry);
bwh = au_dbwh(dentry);
bindex = -1;
wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &a->dt,
&a->pin);
err = PTR_ERR(wh_dentry);
if (IS_ERR(wh_dentry))
goto out_parent;
h_dentry = au_h_dptr(dentry, btop);
dget(h_dentry);
rmdir_later = 0;
if (bindex == btop) {
err = renwh_and_rmdir(dentry, btop, &args->whlist, dir);
if (err > 0) {
rmdir_later = err;
err = 0;
}
} else {
/* stop monitoring */
au_hn_free(au_hi(inode, btop));
/* dir inode is locked */
IMustLock(d_inode(wh_dentry->d_parent));
err = 0;
}
if (!err) {
vfsub_dead_dir(inode);
au_set_dbdiropq(dentry, -1);
epilog(dir, dentry, bindex);
if (rmdir_later) {
au_whtmp_kick_rmdir(dir, btop, h_dentry, args);
args = NULL;
}
goto out_unpin; /* success */
}
/* revert */
AuLabel(revert);
if (wh_dentry) {
int rerr;
rerr = do_revert(err, dir, bindex, bwh, wh_dentry, dentry,
&a->dt);
if (rerr)
err = rerr;
}
out_unpin:
au_unpin(&a->pin);
dput(wh_dentry);
dput(h_dentry);
out_parent:
di_write_unlock(parent);
if (args)
au_whtmp_rmdir_free(args);
out_unlock:
aufs_read_unlock(dentry, AuLock_DW);
out_free:
au_kfree_rcu(a);
out:
AuTraceErr(err);
return err;
}

1250
fs/aufs/i_op_ren.c Normal file

File diff suppressed because it is too large Load Diff

286
fs/aufs/iinfo.c Normal file
View File

@ -0,0 +1,286 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* inode private data
*/
#include "aufs.h"
struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
{
struct inode *h_inode;
struct au_hinode *hinode;
IiMustAnyLock(inode);
hinode = au_hinode(au_ii(inode), bindex);
h_inode = hinode->hi_inode;
AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
return h_inode;
}
/* todo: hard/soft set? */
void au_hiput(struct au_hinode *hinode)
{
au_hn_free(hinode);
dput(hinode->hi_whdentry);
iput(hinode->hi_inode);
}
unsigned int au_hi_flags(struct inode *inode, int isdir)
{
unsigned int flags;
const unsigned int mnt_flags = au_mntflags(inode->i_sb);
flags = 0;
if (au_opt_test(mnt_flags, XINO))
au_fset_hi(flags, XINO);
if (isdir && au_opt_test(mnt_flags, UDBA_HNOTIFY))
au_fset_hi(flags, HNOTIFY);
return flags;
}
void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
struct inode *h_inode, unsigned int flags)
{
struct au_hinode *hinode;
struct inode *hi;
struct au_iinfo *iinfo = au_ii(inode);
IiMustWriteLock(inode);
hinode = au_hinode(iinfo, bindex);
hi = hinode->hi_inode;
AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
if (hi)
au_hiput(hinode);
hinode->hi_inode = h_inode;
if (h_inode) {
int err;
struct super_block *sb = inode->i_sb;
struct au_branch *br;
AuDebugOn(inode->i_mode
&& (h_inode->i_mode & S_IFMT)
!= (inode->i_mode & S_IFMT));
if (bindex == iinfo->ii_btop)
au_cpup_igen(inode, h_inode);
br = au_sbr(sb, bindex);
hinode->hi_id = br->br_id;
if (au_ftest_hi(flags, XINO)) {
err = au_xino_write(sb, bindex, h_inode->i_ino,
inode->i_ino);
if (unlikely(err))
AuIOErr1("failed au_xino_write() %d\n", err);
}
if (au_ftest_hi(flags, HNOTIFY)
&& au_br_hnotifyable(br->br_perm)) {
err = au_hn_alloc(hinode, inode);
if (unlikely(err))
AuIOErr1("au_hn_alloc() %d\n", err);
}
}
}
void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
struct dentry *h_wh)
{
struct au_hinode *hinode;
IiMustWriteLock(inode);
hinode = au_hinode(au_ii(inode), bindex);
AuDebugOn(hinode->hi_whdentry);
hinode->hi_whdentry = h_wh;
}
void au_update_iigen(struct inode *inode, int half)
{
struct au_iinfo *iinfo;
struct au_iigen *iigen;
unsigned int sigen;
sigen = au_sigen(inode->i_sb);
iinfo = au_ii(inode);
iigen = &iinfo->ii_generation;
spin_lock(&iigen->ig_spin);
iigen->ig_generation = sigen;
if (half)
au_ig_fset(iigen->ig_flags, HALF_REFRESHED);
else
au_ig_fclr(iigen->ig_flags, HALF_REFRESHED);
spin_unlock(&iigen->ig_spin);
}
/* it may be called at remount time, too */
void au_update_ibrange(struct inode *inode, int do_put_zero)
{
struct au_iinfo *iinfo;
aufs_bindex_t bindex, bbot;
AuDebugOn(au_is_bad_inode(inode));
IiMustWriteLock(inode);
iinfo = au_ii(inode);
if (do_put_zero && iinfo->ii_btop >= 0) {
for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
bindex++) {
struct inode *h_i;
h_i = au_hinode(iinfo, bindex)->hi_inode;
if (h_i
&& !h_i->i_nlink
&& !(h_i->i_state & I_LINKABLE))
au_set_h_iptr(inode, bindex, NULL, 0);
}
}
iinfo->ii_btop = -1;
iinfo->ii_bbot = -1;
bbot = au_sbbot(inode->i_sb);
for (bindex = 0; bindex <= bbot; bindex++)
if (au_hinode(iinfo, bindex)->hi_inode) {
iinfo->ii_btop = bindex;
break;
}
if (iinfo->ii_btop >= 0)
for (bindex = bbot; bindex >= iinfo->ii_btop; bindex--)
if (au_hinode(iinfo, bindex)->hi_inode) {
iinfo->ii_bbot = bindex;
break;
}
AuDebugOn(iinfo->ii_btop > iinfo->ii_bbot);
}
/* ---------------------------------------------------------------------- */
void au_icntnr_init_once(void *_c)
{
struct au_icntnr *c = _c;
struct au_iinfo *iinfo = &c->iinfo;
spin_lock_init(&iinfo->ii_generation.ig_spin);
au_rw_init(&iinfo->ii_rwsem);
inode_init_once(&c->vfs_inode);
}
void au_hinode_init(struct au_hinode *hinode)
{
hinode->hi_inode = NULL;
hinode->hi_id = -1;
au_hn_init(hinode);
hinode->hi_whdentry = NULL;
}
int au_iinfo_init(struct inode *inode)
{
struct au_iinfo *iinfo;
struct super_block *sb;
struct au_hinode *hi;
int nbr, i;
sb = inode->i_sb;
iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
nbr = au_sbbot(sb) + 1;
if (unlikely(nbr <= 0))
nbr = 1;
hi = kmalloc_array(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
if (hi) {
au_lcnt_inc(&au_sbi(sb)->si_ninodes);
iinfo->ii_hinode = hi;
for (i = 0; i < nbr; i++, hi++)
au_hinode_init(hi);
iinfo->ii_generation.ig_generation = au_sigen(sb);
iinfo->ii_btop = -1;
iinfo->ii_bbot = -1;
iinfo->ii_vdir = NULL;
return 0;
}
return -ENOMEM;
}
int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink)
{
int err, i;
struct au_hinode *hip;
AuRwMustWriteLock(&iinfo->ii_rwsem);
err = -ENOMEM;
hip = au_krealloc(iinfo->ii_hinode, sizeof(*hip) * nbr, GFP_NOFS,
may_shrink);
if (hip) {
iinfo->ii_hinode = hip;
i = iinfo->ii_bbot + 1;
hip += i;
for (; i < nbr; i++, hip++)
au_hinode_init(hip);
err = 0;
}
return err;
}
void au_iinfo_fin(struct inode *inode)
{
struct au_iinfo *iinfo;
struct au_hinode *hi;
struct super_block *sb;
aufs_bindex_t bindex, bbot;
const unsigned char unlinked = !inode->i_nlink;
AuDebugOn(au_is_bad_inode(inode));
sb = inode->i_sb;
au_lcnt_dec(&au_sbi(sb)->si_ninodes);
if (si_pid_test(sb))
au_xino_delete_inode(inode, unlinked);
else {
/*
* it is safe to hide the dependency between sbinfo and
* sb->s_umount.
*/
lockdep_off();
si_noflush_read_lock(sb);
au_xino_delete_inode(inode, unlinked);
si_read_unlock(sb);
lockdep_on();
}
iinfo = au_ii(inode);
if (iinfo->ii_vdir)
au_vdir_free(iinfo->ii_vdir);
bindex = iinfo->ii_btop;
if (bindex >= 0) {
hi = au_hinode(iinfo, bindex);
bbot = iinfo->ii_bbot;
while (bindex++ <= bbot) {
if (hi->hi_inode)
au_hiput(hi);
hi++;
}
}
au_kfree_rcu(iinfo->ii_hinode);
AuRwDestroy(&iinfo->ii_rwsem);
}

529
fs/aufs/inode.c Normal file
View File

@ -0,0 +1,529 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* inode functions
*/
#include <linux/iversion.h>
#include "aufs.h"
struct inode *au_igrab(struct inode *inode)
{
if (inode) {
AuDebugOn(!atomic_read(&inode->i_count));
ihold(inode);
}
return inode;
}
static void au_refresh_hinode_attr(struct inode *inode, int do_version)
{
au_cpup_attr_all(inode, /*force*/0);
au_update_iigen(inode, /*half*/1);
if (do_version)
inode_inc_iversion(inode);
}
static int au_ii_refresh(struct inode *inode, int *update)
{
int err, e, nbr;
umode_t type;
aufs_bindex_t bindex, new_bindex;
struct super_block *sb;
struct au_iinfo *iinfo;
struct au_hinode *p, *q, tmp;
AuDebugOn(au_is_bad_inode(inode));
IiMustWriteLock(inode);
*update = 0;
sb = inode->i_sb;
nbr = au_sbbot(sb) + 1;
type = inode->i_mode & S_IFMT;
iinfo = au_ii(inode);
err = au_hinode_realloc(iinfo, nbr, /*may_shrink*/0);
if (unlikely(err))
goto out;
AuDebugOn(iinfo->ii_btop < 0);
p = au_hinode(iinfo, iinfo->ii_btop);
for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
bindex++, p++) {
if (!p->hi_inode)
continue;
AuDebugOn(type != (p->hi_inode->i_mode & S_IFMT));
new_bindex = au_br_index(sb, p->hi_id);
if (new_bindex == bindex)
continue;
if (new_bindex < 0) {
*update = 1;
au_hiput(p);
p->hi_inode = NULL;
continue;
}
if (new_bindex < iinfo->ii_btop)
iinfo->ii_btop = new_bindex;
if (iinfo->ii_bbot < new_bindex)
iinfo->ii_bbot = new_bindex;
/* swap two lower inode, and loop again */
q = au_hinode(iinfo, new_bindex);
tmp = *q;
*q = *p;
*p = tmp;
if (tmp.hi_inode) {
bindex--;
p--;
}
}
au_update_ibrange(inode, /*do_put_zero*/0);
au_hinode_realloc(iinfo, nbr, /*may_shrink*/1); /* harmless if err */
e = au_dy_irefresh(inode);
if (unlikely(e && !err))
err = e;
out:
AuTraceErr(err);
return err;
}
void au_refresh_iop(struct inode *inode, int force_getattr)
{
int type;
struct au_sbinfo *sbi = au_sbi(inode->i_sb);
const struct inode_operations *iop
= force_getattr ? aufs_iop : sbi->si_iop_array;
if (inode->i_op == iop)
return;
switch (inode->i_mode & S_IFMT) {
case S_IFDIR:
type = AuIop_DIR;
break;
case S_IFLNK:
type = AuIop_SYMLINK;
break;
default:
type = AuIop_OTHER;
break;
}
inode->i_op = iop + type;
/* unnecessary smp_wmb() */
}
int au_refresh_hinode_self(struct inode *inode)
{
int err, update;
err = au_ii_refresh(inode, &update);
if (!err)
au_refresh_hinode_attr(inode, update && S_ISDIR(inode->i_mode));
AuTraceErr(err);
return err;
}
int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
{
int err, e, update;
unsigned int flags;
umode_t mode;
aufs_bindex_t bindex, bbot;
unsigned char isdir;
struct au_hinode *p;
struct au_iinfo *iinfo;
err = au_ii_refresh(inode, &update);
if (unlikely(err))
goto out;
update = 0;
iinfo = au_ii(inode);
p = au_hinode(iinfo, iinfo->ii_btop);
mode = (inode->i_mode & S_IFMT);
isdir = S_ISDIR(mode);
flags = au_hi_flags(inode, isdir);
bbot = au_dbbot(dentry);
for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
struct inode *h_i, *h_inode;
struct dentry *h_d;
h_d = au_h_dptr(dentry, bindex);
if (!h_d || d_is_negative(h_d))
continue;
h_inode = d_inode(h_d);
AuDebugOn(mode != (h_inode->i_mode & S_IFMT));
if (iinfo->ii_btop <= bindex && bindex <= iinfo->ii_bbot) {
h_i = au_h_iptr(inode, bindex);
if (h_i) {
if (h_i == h_inode)
continue;
err = -EIO;
break;
}
}
if (bindex < iinfo->ii_btop)
iinfo->ii_btop = bindex;
if (iinfo->ii_bbot < bindex)
iinfo->ii_bbot = bindex;
au_set_h_iptr(inode, bindex, au_igrab(h_inode), flags);
update = 1;
}
au_update_ibrange(inode, /*do_put_zero*/0);
e = au_dy_irefresh(inode);
if (unlikely(e && !err))
err = e;
if (!err)
au_refresh_hinode_attr(inode, update && isdir);
out:
AuTraceErr(err);
return err;
}
static int set_inode(struct inode *inode, struct dentry *dentry)
{
int err;
unsigned int flags;
umode_t mode;
aufs_bindex_t bindex, btop, btail;
unsigned char isdir;
struct dentry *h_dentry;
struct inode *h_inode;
struct au_iinfo *iinfo;
const struct inode_operations *iop;
IiMustWriteLock(inode);
err = 0;
isdir = 0;
iop = au_sbi(inode->i_sb)->si_iop_array;
btop = au_dbtop(dentry);
h_dentry = au_h_dptr(dentry, btop);
h_inode = d_inode(h_dentry);
mode = h_inode->i_mode;
switch (mode & S_IFMT) {
case S_IFREG:
btail = au_dbtail(dentry);
inode->i_op = iop + AuIop_OTHER;
inode->i_fop = &aufs_file_fop;
err = au_dy_iaop(inode, btop, h_inode);
if (unlikely(err))
goto out;
break;
case S_IFDIR:
isdir = 1;
btail = au_dbtaildir(dentry);
inode->i_op = iop + AuIop_DIR;
inode->i_fop = &aufs_dir_fop;
break;
case S_IFLNK:
btail = au_dbtail(dentry);
inode->i_op = iop + AuIop_SYMLINK;
break;
case S_IFBLK:
case S_IFCHR:
case S_IFIFO:
case S_IFSOCK:
btail = au_dbtail(dentry);
inode->i_op = iop + AuIop_OTHER;
init_special_inode(inode, mode, h_inode->i_rdev);
break;
default:
AuIOErr("Unknown file type 0%o\n", mode);
err = -EIO;
goto out;
}
/* do not set hnotify for whiteouted dirs (SHWH mode) */
flags = au_hi_flags(inode, isdir);
if (au_opt_test(au_mntflags(dentry->d_sb), SHWH)
&& au_ftest_hi(flags, HNOTIFY)
&& dentry->d_name.len > AUFS_WH_PFX_LEN
&& !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
au_fclr_hi(flags, HNOTIFY);
iinfo = au_ii(inode);
iinfo->ii_btop = btop;
iinfo->ii_bbot = btail;
for (bindex = btop; bindex <= btail; bindex++) {
h_dentry = au_h_dptr(dentry, bindex);
if (h_dentry)
au_set_h_iptr(inode, bindex,
au_igrab(d_inode(h_dentry)), flags);
}
au_cpup_attr_all(inode, /*force*/1);
/*
* to force calling aufs_get_acl() every time,
* do not call cache_no_acl() for aufs inode.
*/
out:
return err;
}
/*
* successful returns with iinfo write_locked
* minus: errno
* zero: success, matched
* plus: no error, but unmatched
*/
static int reval_inode(struct inode *inode, struct dentry *dentry)
{
int err;
unsigned int gen, igflags;
aufs_bindex_t bindex, bbot;
struct inode *h_inode, *h_dinode;
struct dentry *h_dentry;
/*
* before this function, if aufs got any iinfo lock, it must be only
* one, the parent dir.
* it can happen by UDBA and the obsoleted inode number.
*/
err = -EIO;
if (unlikely(inode->i_ino == parent_ino(dentry)))
goto out;
err = 1;
ii_write_lock_new_child(inode);
h_dentry = au_h_dptr(dentry, au_dbtop(dentry));
h_dinode = d_inode(h_dentry);
bbot = au_ibbot(inode);
for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
h_inode = au_h_iptr(inode, bindex);
if (!h_inode || h_inode != h_dinode)
continue;
err = 0;
gen = au_iigen(inode, &igflags);
if (gen == au_digen(dentry)
&& !au_ig_ftest(igflags, HALF_REFRESHED))
break;
/* fully refresh inode using dentry */
err = au_refresh_hinode(inode, dentry);
if (!err)
au_update_iigen(inode, /*half*/0);
break;
}
if (unlikely(err))
ii_write_unlock(inode);
out:
return err;
}
int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
unsigned int d_type, ino_t *ino)
{
int err, idx;
const int isnondir = d_type != DT_DIR;
/* prevent hardlinked inode number from race condition */
if (isnondir) {
err = au_xinondir_enter(sb, bindex, h_ino, &idx);
if (unlikely(err))
goto out;
}
err = au_xino_read(sb, bindex, h_ino, ino);
if (unlikely(err))
goto out_xinondir;
if (!*ino) {
err = -EIO;
*ino = au_xino_new_ino(sb);
if (unlikely(!*ino))
goto out_xinondir;
err = au_xino_write(sb, bindex, h_ino, *ino);
if (unlikely(err))
goto out_xinondir;
}
out_xinondir:
if (isnondir && idx >= 0)
au_xinondir_leave(sb, bindex, h_ino, idx);
out:
return err;
}
/* successful returns with iinfo write_locked */
/* todo: return with unlocked? */
struct inode *au_new_inode(struct dentry *dentry, int must_new)
{
struct inode *inode, *h_inode;
struct dentry *h_dentry;
struct super_block *sb;
ino_t h_ino, ino;
int err, idx, hlinked;
aufs_bindex_t btop;
sb = dentry->d_sb;
btop = au_dbtop(dentry);
h_dentry = au_h_dptr(dentry, btop);
h_inode = d_inode(h_dentry);
h_ino = h_inode->i_ino;
hlinked = !d_is_dir(h_dentry) && h_inode->i_nlink > 1;
new_ino:
/*
* stop 'race'-ing between hardlinks under different
* parents.
*/
if (hlinked) {
err = au_xinondir_enter(sb, btop, h_ino, &idx);
inode = ERR_PTR(err);
if (unlikely(err))
goto out;
}
err = au_xino_read(sb, btop, h_ino, &ino);
inode = ERR_PTR(err);
if (unlikely(err))
goto out_xinondir;
if (!ino) {
ino = au_xino_new_ino(sb);
if (unlikely(!ino)) {
inode = ERR_PTR(-EIO);
goto out_xinondir;
}
}
AuDbg("i%lu\n", (unsigned long)ino);
inode = au_iget_locked(sb, ino);
err = PTR_ERR(inode);
if (IS_ERR(inode))
goto out_xinondir;
AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
if (inode->i_state & I_NEW) {
ii_write_lock_new_child(inode);
err = set_inode(inode, dentry);
if (!err) {
unlock_new_inode(inode);
goto out_xinondir; /* success */
}
/*
* iget_failed() calls iput(), but we need to call
* ii_write_unlock() after iget_failed(). so dirty hack for
* i_count.
*/
atomic_inc(&inode->i_count);
iget_failed(inode);
ii_write_unlock(inode);
au_xino_write(sb, btop, h_ino, /*ino*/0);
/* ignore this error */
goto out_iput;
} else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
/*
* horrible race condition between lookup, readdir and copyup
* (or something).
*/
if (hlinked && idx >= 0)
au_xinondir_leave(sb, btop, h_ino, idx);
err = reval_inode(inode, dentry);
if (unlikely(err < 0)) {
hlinked = 0;
goto out_iput;
}
if (!err)
goto out; /* success */
else if (hlinked && idx >= 0) {
err = au_xinondir_enter(sb, btop, h_ino, &idx);
if (unlikely(err)) {
iput(inode);
inode = ERR_PTR(err);
goto out;
}
}
}
if (unlikely(au_test_fs_unique_ino(h_inode)))
AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
" b%d, %s, %pd, hi%lu, i%lu.\n",
btop, au_sbtype(h_dentry->d_sb), dentry,
(unsigned long)h_ino, (unsigned long)ino);
ino = 0;
err = au_xino_write(sb, btop, h_ino, /*ino*/0);
if (!err) {
iput(inode);
if (hlinked && idx >= 0)
au_xinondir_leave(sb, btop, h_ino, idx);
goto new_ino;
}
out_iput:
iput(inode);
inode = ERR_PTR(err);
out_xinondir:
if (hlinked && idx >= 0)
au_xinondir_leave(sb, btop, h_ino, idx);
out:
return inode;
}
/* ---------------------------------------------------------------------- */
int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
struct inode *inode)
{
int err;
struct inode *hi;
err = au_br_rdonly(au_sbr(sb, bindex));
/* pseudo-link after flushed may happen out of bounds */
if (!err
&& inode
&& au_ibtop(inode) <= bindex
&& bindex <= au_ibbot(inode)) {
/*
* permission check is unnecessary since vfsub routine
* will be called later
*/
hi = au_h_iptr(inode, bindex);
if (hi)
err = IS_IMMUTABLE(hi) ? -EROFS : 0;
}
return err;
}
int au_test_h_perm(struct inode *h_inode, int mask)
{
if (uid_eq(current_fsuid(), GLOBAL_ROOT_UID))
return 0;
return inode_permission(h_inode, mask);
}
int au_test_h_perm_sio(struct inode *h_inode, int mask)
{
if (au_test_nfs(h_inode->i_sb)
&& (mask & MAY_WRITE)
&& S_ISDIR(h_inode->i_mode))
mask |= MAY_READ; /* force permission check */
return au_test_h_perm(h_inode, mask);
}

698
fs/aufs/inode.h Normal file
View File

@ -0,0 +1,698 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* inode operations
*/
#ifndef __AUFS_INODE_H__
#define __AUFS_INODE_H__
#ifdef __KERNEL__
#include <linux/fsnotify.h>
#include "rwsem.h"
struct vfsmount;
struct au_hnotify {
#ifdef CONFIG_AUFS_HNOTIFY
#ifdef CONFIG_AUFS_HFSNOTIFY
/* never use fsnotify_add_vfsmount_mark() */
struct fsnotify_mark hn_mark;
#endif
struct inode *hn_aufs_inode; /* no get/put */
struct rcu_head rcu;
#endif
} ____cacheline_aligned_in_smp;
struct au_hinode {
struct inode *hi_inode;
aufs_bindex_t hi_id;
#ifdef CONFIG_AUFS_HNOTIFY
struct au_hnotify *hi_notify;
#endif
/* reference to the copied-up whiteout with get/put */
struct dentry *hi_whdentry;
};
/* ig_flags */
#define AuIG_HALF_REFRESHED 1
#define au_ig_ftest(flags, name) ((flags) & AuIG_##name)
#define au_ig_fset(flags, name) \
do { (flags) |= AuIG_##name; } while (0)
#define au_ig_fclr(flags, name) \
do { (flags) &= ~AuIG_##name; } while (0)
struct au_iigen {
spinlock_t ig_spin;
__u32 ig_generation, ig_flags;
};
struct au_vdir;
struct au_iinfo {
struct au_iigen ii_generation;
struct super_block *ii_hsb1; /* no get/put */
struct au_rwsem ii_rwsem;
aufs_bindex_t ii_btop, ii_bbot;
__u32 ii_higen;
struct au_hinode *ii_hinode;
struct au_vdir *ii_vdir;
};
struct au_icntnr {
struct au_iinfo iinfo;
struct inode vfs_inode;
struct hlist_bl_node plink;
struct rcu_head rcu;
} ____cacheline_aligned_in_smp;
/* au_pin flags */
#define AuPin_DI_LOCKED 1
#define AuPin_MNT_WRITE (1 << 1)
#define au_ftest_pin(flags, name) ((flags) & AuPin_##name)
#define au_fset_pin(flags, name) \
do { (flags) |= AuPin_##name; } while (0)
#define au_fclr_pin(flags, name) \
do { (flags) &= ~AuPin_##name; } while (0)
struct au_pin {
/* input */
struct dentry *dentry;
unsigned int udba;
unsigned char lsc_di, lsc_hi, flags;
aufs_bindex_t bindex;
/* output */
struct dentry *parent;
struct au_hinode *hdir;
struct vfsmount *h_mnt;
/* temporary unlock/relock for copyup */
struct dentry *h_dentry, *h_parent;
struct au_branch *br;
struct task_struct *task;
};
void au_pin_hdir_unlock(struct au_pin *p);
int au_pin_hdir_lock(struct au_pin *p);
int au_pin_hdir_relock(struct au_pin *p);
void au_pin_hdir_acquire_nest(struct au_pin *p);
void au_pin_hdir_release(struct au_pin *p);
/* ---------------------------------------------------------------------- */
static inline struct au_iinfo *au_ii(struct inode *inode)
{
BUG_ON(is_bad_inode(inode));
return &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
}
/* ---------------------------------------------------------------------- */
/* inode.c */
struct inode *au_igrab(struct inode *inode);
void au_refresh_iop(struct inode *inode, int force_getattr);
int au_refresh_hinode_self(struct inode *inode);
int au_refresh_hinode(struct inode *inode, struct dentry *dentry);
int au_ino(struct super_block *sb, aufs_bindex_t bindex, ino_t h_ino,
unsigned int d_type, ino_t *ino);
struct inode *au_new_inode(struct dentry *dentry, int must_new);
int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
struct inode *inode);
int au_test_h_perm(struct inode *h_inode, int mask);
int au_test_h_perm_sio(struct inode *h_inode, int mask);
static inline int au_wh_ino(struct super_block *sb, aufs_bindex_t bindex,
ino_t h_ino, unsigned int d_type, ino_t *ino)
{
#ifdef CONFIG_AUFS_SHWH
return au_ino(sb, bindex, h_ino, d_type, ino);
#else
return 0;
#endif
}
/* i_op.c */
enum {
AuIop_SYMLINK,
AuIop_DIR,
AuIop_OTHER,
AuIop_Last
};
extern struct inode_operations aufs_iop[AuIop_Last], /* not const */
aufs_iop_nogetattr[AuIop_Last];
/* au_wr_dir flags */
#define AuWrDir_ADD_ENTRY 1
#define AuWrDir_ISDIR (1 << 1)
#define AuWrDir_TMPFILE (1 << 2)
#define au_ftest_wrdir(flags, name) ((flags) & AuWrDir_##name)
#define au_fset_wrdir(flags, name) \
do { (flags) |= AuWrDir_##name; } while (0)
#define au_fclr_wrdir(flags, name) \
do { (flags) &= ~AuWrDir_##name; } while (0)
struct au_wr_dir_args {
aufs_bindex_t force_btgt;
unsigned char flags;
};
int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
struct au_wr_dir_args *args);
struct dentry *au_pinned_h_parent(struct au_pin *pin);
void au_pin_init(struct au_pin *pin, struct dentry *dentry,
aufs_bindex_t bindex, int lsc_di, int lsc_hi,
unsigned int udba, unsigned char flags);
int au_pin(struct au_pin *pin, struct dentry *dentry, aufs_bindex_t bindex,
unsigned int udba, unsigned char flags) __must_check;
int au_do_pin(struct au_pin *pin) __must_check;
void au_unpin(struct au_pin *pin);
int au_reval_for_attr(struct dentry *dentry, unsigned int sigen);
#define AuIcpup_DID_CPUP 1
#define au_ftest_icpup(flags, name) ((flags) & AuIcpup_##name)
#define au_fset_icpup(flags, name) \
do { (flags) |= AuIcpup_##name; } while (0)
#define au_fclr_icpup(flags, name) \
do { (flags) &= ~AuIcpup_##name; } while (0)
struct au_icpup_args {
unsigned char flags;
unsigned char pin_flags;
aufs_bindex_t btgt;
unsigned int udba;
struct au_pin pin;
struct path h_path;
struct inode *h_inode;
};
int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
struct au_icpup_args *a);
int au_h_path_getattr(struct dentry *dentry, struct inode *inode, int force,
struct path *h_path, int locked);
/* i_op_add.c */
int au_may_add(struct dentry *dentry, aufs_bindex_t bindex,
struct dentry *h_parent, int isdir);
int aufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
dev_t dev);
int aufs_symlink(struct inode *dir, struct dentry *dentry, const char *symname);
int aufs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
bool want_excl);
struct vfsub_aopen_args;
int au_aopen_or_create(struct inode *dir, struct dentry *dentry,
struct vfsub_aopen_args *args);
int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode);
int aufs_link(struct dentry *src_dentry, struct inode *dir,
struct dentry *dentry);
int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
/* i_op_del.c */
int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup);
int au_may_del(struct dentry *dentry, aufs_bindex_t bindex,
struct dentry *h_parent, int isdir);
int aufs_unlink(struct inode *dir, struct dentry *dentry);
int aufs_rmdir(struct inode *dir, struct dentry *dentry);
/* i_op_ren.c */
int au_wbr(struct dentry *dentry, aufs_bindex_t btgt);
int aufs_rename(struct inode *src_dir, struct dentry *src_dentry,
struct inode *dir, struct dentry *dentry,
unsigned int flags);
/* iinfo.c */
struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex);
void au_hiput(struct au_hinode *hinode);
void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
struct dentry *h_wh);
unsigned int au_hi_flags(struct inode *inode, int isdir);
/* hinode flags */
#define AuHi_XINO 1
#define AuHi_HNOTIFY (1 << 1)
#define au_ftest_hi(flags, name) ((flags) & AuHi_##name)
#define au_fset_hi(flags, name) \
do { (flags) |= AuHi_##name; } while (0)
#define au_fclr_hi(flags, name) \
do { (flags) &= ~AuHi_##name; } while (0)
#ifndef CONFIG_AUFS_HNOTIFY
#undef AuHi_HNOTIFY
#define AuHi_HNOTIFY 0
#endif
void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
struct inode *h_inode, unsigned int flags);
void au_update_iigen(struct inode *inode, int half);
void au_update_ibrange(struct inode *inode, int do_put_zero);
void au_icntnr_init_once(void *_c);
void au_hinode_init(struct au_hinode *hinode);
int au_iinfo_init(struct inode *inode);
void au_iinfo_fin(struct inode *inode);
int au_hinode_realloc(struct au_iinfo *iinfo, int nbr, int may_shrink);
#ifdef CONFIG_PROC_FS
/* plink.c */
int au_plink_maint(struct super_block *sb, int flags);
struct au_sbinfo;
void au_plink_maint_leave(struct au_sbinfo *sbinfo);
int au_plink_maint_enter(struct super_block *sb);
#ifdef CONFIG_AUFS_DEBUG
void au_plink_list(struct super_block *sb);
#else
AuStubVoid(au_plink_list, struct super_block *sb)
#endif
int au_plink_test(struct inode *inode);
struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex);
void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
struct dentry *h_dentry);
void au_plink_put(struct super_block *sb, int verbose);
void au_plink_clean(struct super_block *sb, int verbose);
void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id);
#else
AuStubInt0(au_plink_maint, struct super_block *sb, int flags);
AuStubVoid(au_plink_maint_leave, struct au_sbinfo *sbinfo);
AuStubInt0(au_plink_maint_enter, struct super_block *sb);
AuStubVoid(au_plink_list, struct super_block *sb);
AuStubInt0(au_plink_test, struct inode *inode);
AuStub(struct dentry *, au_plink_lkup, return NULL,
struct inode *inode, aufs_bindex_t bindex);
AuStubVoid(au_plink_append, struct inode *inode, aufs_bindex_t bindex,
struct dentry *h_dentry);
AuStubVoid(au_plink_put, struct super_block *sb, int verbose);
AuStubVoid(au_plink_clean, struct super_block *sb, int verbose);
AuStubVoid(au_plink_half_refresh, struct super_block *sb, aufs_bindex_t br_id);
#endif /* CONFIG_PROC_FS */
#ifdef CONFIG_AUFS_XATTR
/* xattr.c */
int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
unsigned int verbose);
ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size);
void au_xattr_init(struct super_block *sb);
#else
AuStubInt0(au_cpup_xattr, struct dentry *h_dst, struct dentry *h_src,
int ignore_flags, unsigned int verbose);
AuStubVoid(au_xattr_init, struct super_block *sb);
#endif
#ifdef CONFIG_FS_POSIX_ACL
struct posix_acl *aufs_get_acl(struct inode *inode, int type);
int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
#endif
#if IS_ENABLED(CONFIG_AUFS_XATTR) || IS_ENABLED(CONFIG_FS_POSIX_ACL)
enum {
AU_XATTR_SET,
AU_ACL_SET
};
struct au_sxattr {
int type;
union {
struct {
const char *name;
const void *value;
size_t size;
int flags;
} set;
struct {
struct posix_acl *acl;
int type;
} acl_set;
} u;
};
ssize_t au_sxattr(struct dentry *dentry, struct inode *inode,
struct au_sxattr *arg);
#endif
/* ---------------------------------------------------------------------- */
/* lock subclass for iinfo */
enum {
AuLsc_II_CHILD, /* child first */
AuLsc_II_CHILD2, /* rename(2), link(2), and cpup at hnotify */
AuLsc_II_CHILD3, /* copyup dirs */
AuLsc_II_PARENT, /* see AuLsc_I_PARENT in vfsub.h */
AuLsc_II_PARENT2,
AuLsc_II_PARENT3, /* copyup dirs */
AuLsc_II_NEW_CHILD
};
/*
* ii_read_lock_child, ii_write_lock_child,
* ii_read_lock_child2, ii_write_lock_child2,
* ii_read_lock_child3, ii_write_lock_child3,
* ii_read_lock_parent, ii_write_lock_parent,
* ii_read_lock_parent2, ii_write_lock_parent2,
* ii_read_lock_parent3, ii_write_lock_parent3,
* ii_read_lock_new_child, ii_write_lock_new_child,
*/
#define AuReadLockFunc(name, lsc) \
static inline void ii_read_lock_##name(struct inode *i) \
{ \
au_rw_read_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
}
#define AuWriteLockFunc(name, lsc) \
static inline void ii_write_lock_##name(struct inode *i) \
{ \
au_rw_write_lock_nested(&au_ii(i)->ii_rwsem, AuLsc_II_##lsc); \
}
#define AuRWLockFuncs(name, lsc) \
AuReadLockFunc(name, lsc) \
AuWriteLockFunc(name, lsc)
AuRWLockFuncs(child, CHILD);
AuRWLockFuncs(child2, CHILD2);
AuRWLockFuncs(child3, CHILD3);
AuRWLockFuncs(parent, PARENT);
AuRWLockFuncs(parent2, PARENT2);
AuRWLockFuncs(parent3, PARENT3);
AuRWLockFuncs(new_child, NEW_CHILD);
#undef AuReadLockFunc
#undef AuWriteLockFunc
#undef AuRWLockFuncs
#define ii_read_unlock(i) au_rw_read_unlock(&au_ii(i)->ii_rwsem)
#define ii_write_unlock(i) au_rw_write_unlock(&au_ii(i)->ii_rwsem)
#define ii_downgrade_lock(i) au_rw_dgrade_lock(&au_ii(i)->ii_rwsem)
#define IiMustNoWaiters(i) AuRwMustNoWaiters(&au_ii(i)->ii_rwsem)
#define IiMustAnyLock(i) AuRwMustAnyLock(&au_ii(i)->ii_rwsem)
#define IiMustWriteLock(i) AuRwMustWriteLock(&au_ii(i)->ii_rwsem)
/* ---------------------------------------------------------------------- */
static inline void au_icntnr_init(struct au_icntnr *c)
{
#ifdef CONFIG_AUFS_DEBUG
c->vfs_inode.i_mode = 0;
#endif
}
static inline unsigned int au_iigen(struct inode *inode, unsigned int *igflags)
{
unsigned int gen;
struct au_iinfo *iinfo;
struct au_iigen *iigen;
iinfo = au_ii(inode);
iigen = &iinfo->ii_generation;
spin_lock(&iigen->ig_spin);
if (igflags)
*igflags = iigen->ig_flags;
gen = iigen->ig_generation;
spin_unlock(&iigen->ig_spin);
return gen;
}
/* tiny test for inode number */
/* tmpfs generation is too rough */
static inline int au_test_higen(struct inode *inode, struct inode *h_inode)
{
struct au_iinfo *iinfo;
iinfo = au_ii(inode);
AuRwMustAnyLock(&iinfo->ii_rwsem);
return !(iinfo->ii_hsb1 == h_inode->i_sb
&& iinfo->ii_higen == h_inode->i_generation);
}
static inline void au_iigen_dec(struct inode *inode)
{
struct au_iinfo *iinfo;
struct au_iigen *iigen;
iinfo = au_ii(inode);
iigen = &iinfo->ii_generation;
spin_lock(&iigen->ig_spin);
iigen->ig_generation--;
spin_unlock(&iigen->ig_spin);
}
static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
{
int err;
err = 0;
if (unlikely(inode && au_iigen(inode, NULL) != sigen))
err = -EIO;
return err;
}
/* ---------------------------------------------------------------------- */
static inline struct au_hinode *au_hinode(struct au_iinfo *iinfo,
aufs_bindex_t bindex)
{
return iinfo->ii_hinode + bindex;
}
static inline int au_is_bad_inode(struct inode *inode)
{
return !!(is_bad_inode(inode) || !au_hinode(au_ii(inode), 0));
}
static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
aufs_bindex_t bindex)
{
IiMustAnyLock(inode);
return au_hinode(au_ii(inode), bindex)->hi_id;
}
static inline aufs_bindex_t au_ibtop(struct inode *inode)
{
IiMustAnyLock(inode);
return au_ii(inode)->ii_btop;
}
static inline aufs_bindex_t au_ibbot(struct inode *inode)
{
IiMustAnyLock(inode);
return au_ii(inode)->ii_bbot;
}
static inline struct au_vdir *au_ivdir(struct inode *inode)
{
IiMustAnyLock(inode);
return au_ii(inode)->ii_vdir;
}
static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
{
IiMustAnyLock(inode);
return au_hinode(au_ii(inode), bindex)->hi_whdentry;
}
static inline void au_set_ibtop(struct inode *inode, aufs_bindex_t bindex)
{
IiMustWriteLock(inode);
au_ii(inode)->ii_btop = bindex;
}
static inline void au_set_ibbot(struct inode *inode, aufs_bindex_t bindex)
{
IiMustWriteLock(inode);
au_ii(inode)->ii_bbot = bindex;
}
static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
{
IiMustWriteLock(inode);
au_ii(inode)->ii_vdir = vdir;
}
static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
{
IiMustAnyLock(inode);
return au_hinode(au_ii(inode), bindex);
}
/* ---------------------------------------------------------------------- */
static inline struct dentry *au_pinned_parent(struct au_pin *pin)
{
if (pin)
return pin->parent;
return NULL;
}
static inline struct inode *au_pinned_h_dir(struct au_pin *pin)
{
if (pin && pin->hdir)
return pin->hdir->hi_inode;
return NULL;
}
static inline struct au_hinode *au_pinned_hdir(struct au_pin *pin)
{
if (pin)
return pin->hdir;
return NULL;
}
static inline void au_pin_set_dentry(struct au_pin *pin, struct dentry *dentry)
{
if (pin)
pin->dentry = dentry;
}
static inline void au_pin_set_parent_lflag(struct au_pin *pin,
unsigned char lflag)
{
if (pin) {
if (lflag)
au_fset_pin(pin->flags, DI_LOCKED);
else
au_fclr_pin(pin->flags, DI_LOCKED);
}
}
#if 0 /* reserved */
static inline void au_pin_set_parent(struct au_pin *pin, struct dentry *parent)
{
if (pin) {
dput(pin->parent);
pin->parent = dget(parent);
}
}
#endif
/* ---------------------------------------------------------------------- */
struct au_branch;
#ifdef CONFIG_AUFS_HNOTIFY
struct au_hnotify_op {
void (*ctl)(struct au_hinode *hinode, int do_set);
int (*alloc)(struct au_hinode *hinode);
/*
* if it returns true, the the caller should free hinode->hi_notify,
* otherwise ->free() frees it.
*/
int (*free)(struct au_hinode *hinode,
struct au_hnotify *hn) __must_check;
void (*fin)(void);
int (*init)(void);
int (*reset_br)(unsigned int udba, struct au_branch *br, int perm);
void (*fin_br)(struct au_branch *br);
int (*init_br)(struct au_branch *br, int perm);
};
/* hnotify.c */
int au_hn_alloc(struct au_hinode *hinode, struct inode *inode);
void au_hn_free(struct au_hinode *hinode);
void au_hn_ctl(struct au_hinode *hinode, int do_set);
void au_hn_reset(struct inode *inode, unsigned int flags);
int au_hnotify(struct inode *h_dir, struct au_hnotify *hnotify, u32 mask,
const struct qstr *h_child_qstr, struct inode *h_child_inode);
int au_hnotify_reset_br(unsigned int udba, struct au_branch *br, int perm);
int au_hnotify_init_br(struct au_branch *br, int perm);
void au_hnotify_fin_br(struct au_branch *br);
int __init au_hnotify_init(void);
void au_hnotify_fin(void);
/* hfsnotify.c */
extern const struct au_hnotify_op au_hnotify_op;
static inline
void au_hn_init(struct au_hinode *hinode)
{
hinode->hi_notify = NULL;
}
static inline struct au_hnotify *au_hn(struct au_hinode *hinode)
{
return hinode->hi_notify;
}
#else
AuStub(int, au_hn_alloc, return -EOPNOTSUPP,
struct au_hinode *hinode __maybe_unused,
struct inode *inode __maybe_unused)
AuStub(struct au_hnotify *, au_hn, return NULL, struct au_hinode *hinode)
AuStubVoid(au_hn_free, struct au_hinode *hinode __maybe_unused)
AuStubVoid(au_hn_ctl, struct au_hinode *hinode __maybe_unused,
int do_set __maybe_unused)
AuStubVoid(au_hn_reset, struct inode *inode __maybe_unused,
unsigned int flags __maybe_unused)
AuStubInt0(au_hnotify_reset_br, unsigned int udba __maybe_unused,
struct au_branch *br __maybe_unused,
int perm __maybe_unused)
AuStubInt0(au_hnotify_init_br, struct au_branch *br __maybe_unused,
int perm __maybe_unused)
AuStubVoid(au_hnotify_fin_br, struct au_branch *br __maybe_unused)
AuStubInt0(__init au_hnotify_init, void)
AuStubVoid(au_hnotify_fin, void)
AuStubVoid(au_hn_init, struct au_hinode *hinode __maybe_unused)
#endif /* CONFIG_AUFS_HNOTIFY */
static inline void au_hn_suspend(struct au_hinode *hdir)
{
au_hn_ctl(hdir, /*do_set*/0);
}
static inline void au_hn_resume(struct au_hinode *hdir)
{
au_hn_ctl(hdir, /*do_set*/1);
}
static inline void au_hn_inode_lock(struct au_hinode *hdir)
{
inode_lock(hdir->hi_inode);
au_hn_suspend(hdir);
}
static inline void au_hn_inode_lock_nested(struct au_hinode *hdir,
unsigned int sc __maybe_unused)
{
inode_lock_nested(hdir->hi_inode, sc);
au_hn_suspend(hdir);
}
#if 0 /* unused */
#include "vfsub.h"
static inline void au_hn_inode_lock_shared_nested(struct au_hinode *hdir,
unsigned int sc)
{
inode_lock_shared_nested(hdir->hi_inode, sc);
au_hn_suspend(hdir);
}
#endif
static inline void au_hn_inode_unlock(struct au_hinode *hdir)
{
au_hn_resume(hdir);
inode_unlock(hdir->hi_inode);
}
#endif /* __KERNEL__ */
#endif /* __AUFS_INODE_H__ */

220
fs/aufs/ioctl.c Normal file
View File

@ -0,0 +1,220 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* ioctl
* plink-management and readdir in userspace.
* assist the pathconf(3) wrapper library.
* move-down
* File-based Hierarchical Storage Management.
*/
#include <linux/compat.h>
#include <linux/file.h>
#include "aufs.h"
static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
{
int err, fd;
aufs_bindex_t wbi, bindex, bbot;
struct file *h_file;
struct super_block *sb;
struct dentry *root;
struct au_branch *br;
struct aufs_wbr_fd wbrfd = {
.oflags = au_dir_roflags,
.brid = -1
};
const int valid = O_RDONLY | O_NONBLOCK | O_LARGEFILE | O_DIRECTORY
| O_NOATIME | O_CLOEXEC;
AuDebugOn(wbrfd.oflags & ~valid);
if (arg) {
err = copy_from_user(&wbrfd, arg, sizeof(wbrfd));
if (unlikely(err)) {
err = -EFAULT;
goto out;
}
err = -EINVAL;
AuDbg("wbrfd{0%o, %d}\n", wbrfd.oflags, wbrfd.brid);
wbrfd.oflags |= au_dir_roflags;
AuDbg("0%o\n", wbrfd.oflags);
if (unlikely(wbrfd.oflags & ~valid))
goto out;
}
fd = get_unused_fd_flags(0);
err = fd;
if (unlikely(fd < 0))
goto out;
h_file = ERR_PTR(-EINVAL);
wbi = 0;
br = NULL;
sb = path->dentry->d_sb;
root = sb->s_root;
aufs_read_lock(root, AuLock_IR);
bbot = au_sbbot(sb);
if (wbrfd.brid >= 0) {
wbi = au_br_index(sb, wbrfd.brid);
if (unlikely(wbi < 0 || wbi > bbot))
goto out_unlock;
}
h_file = ERR_PTR(-ENOENT);
br = au_sbr(sb, wbi);
if (!au_br_writable(br->br_perm)) {
if (arg)
goto out_unlock;
bindex = wbi + 1;
wbi = -1;
for (; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
if (au_br_writable(br->br_perm)) {
wbi = bindex;
br = au_sbr(sb, wbi);
break;
}
}
}
AuDbg("wbi %d\n", wbi);
if (wbi >= 0)
h_file = au_h_open(root, wbi, wbrfd.oflags, NULL,
/*force_wr*/0);
out_unlock:
aufs_read_unlock(root, AuLock_IR);
err = PTR_ERR(h_file);
if (IS_ERR(h_file))
goto out_fd;
au_lcnt_dec(&br->br_nfiles); /* cf. au_h_open() */
fd_install(fd, h_file);
err = fd;
goto out; /* success */
out_fd:
put_unused_fd(fd);
out:
AuTraceErr(err);
return err;
}
/* ---------------------------------------------------------------------- */
long aufs_ioctl_dir(struct file *file, unsigned int cmd, unsigned long arg)
{
long err;
struct dentry *dentry;
switch (cmd) {
case AUFS_CTL_RDU:
case AUFS_CTL_RDU_INO:
err = au_rdu_ioctl(file, cmd, arg);
break;
case AUFS_CTL_WBR_FD:
err = au_wbr_fd(&file->f_path, (void __user *)arg);
break;
case AUFS_CTL_IBUSY:
err = au_ibusy_ioctl(file, arg);
break;
case AUFS_CTL_BRINFO:
err = au_brinfo_ioctl(file, arg);
break;
case AUFS_CTL_FHSM_FD:
dentry = file->f_path.dentry;
if (IS_ROOT(dentry))
err = au_fhsm_fd(dentry->d_sb, arg);
else
err = -ENOTTY;
break;
default:
/* do not call the lower */
AuDbg("0x%x\n", cmd);
err = -ENOTTY;
}
AuTraceErr(err);
return err;
}
long aufs_ioctl_nondir(struct file *file, unsigned int cmd, unsigned long arg)
{
long err;
switch (cmd) {
case AUFS_CTL_MVDOWN:
err = au_mvdown(file->f_path.dentry, (void __user *)arg);
break;
case AUFS_CTL_WBR_FD:
err = au_wbr_fd(&file->f_path, (void __user *)arg);
break;
default:
/* do not call the lower */
AuDbg("0x%x\n", cmd);
err = -ENOTTY;
}
AuTraceErr(err);
return err;
}
#ifdef CONFIG_COMPAT
long aufs_compat_ioctl_dir(struct file *file, unsigned int cmd,
unsigned long arg)
{
long err;
switch (cmd) {
case AUFS_CTL_RDU:
case AUFS_CTL_RDU_INO:
err = au_rdu_compat_ioctl(file, cmd, arg);
break;
case AUFS_CTL_IBUSY:
err = au_ibusy_compat_ioctl(file, arg);
break;
case AUFS_CTL_BRINFO:
err = au_brinfo_compat_ioctl(file, arg);
break;
default:
err = aufs_ioctl_dir(file, cmd, arg);
}
AuTraceErr(err);
return err;
}
long aufs_compat_ioctl_nondir(struct file *file, unsigned int cmd,
unsigned long arg)
{
return aufs_ioctl_nondir(file, cmd, (unsigned long)compat_ptr(arg));
}
#endif

186
fs/aufs/lcnt.h Normal file
View File

@ -0,0 +1,186 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2018-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* simple long counter wrapper
*/
#ifndef __AUFS_LCNT_H__
#define __AUFS_LCNT_H__
#ifdef __KERNEL__
#include "debug.h"
#define AuLCntATOMIC 1
#define AuLCntPCPUCNT 2
/*
* why does percpu_refcount require extra synchronize_rcu()s in
* au_br_do_free()
*/
#define AuLCntPCPUREF 3
/* #define AuLCntChosen AuLCntATOMIC */
#define AuLCntChosen AuLCntPCPUCNT
/* #define AuLCntChosen AuLCntPCPUREF */
#if AuLCntChosen == AuLCntATOMIC
#include <linux/atomic.h>
typedef atomic_long_t au_lcnt_t;
static inline int au_lcnt_init(au_lcnt_t *cnt, void *release __maybe_unused)
{
atomic_long_set(cnt, 0);
return 0;
}
static inline void au_lcnt_wait_for_fin(au_lcnt_t *cnt __maybe_unused)
{
/* empty */
}
static inline void au_lcnt_fin(au_lcnt_t *cnt __maybe_unused,
int do_sync __maybe_unused)
{
/* empty */
}
static inline void au_lcnt_inc(au_lcnt_t *cnt)
{
atomic_long_inc(cnt);
}
static inline void au_lcnt_dec(au_lcnt_t *cnt)
{
atomic_long_dec(cnt);
}
static inline long au_lcnt_read(au_lcnt_t *cnt, int do_rev __maybe_unused)
{
return atomic_long_read(cnt);
}
#endif
#if AuLCntChosen == AuLCntPCPUCNT
#include <linux/percpu_counter.h>
typedef struct percpu_counter au_lcnt_t;
static inline int au_lcnt_init(au_lcnt_t *cnt, void *release __maybe_unused)
{
return percpu_counter_init(cnt, 0, GFP_NOFS);
}
static inline void au_lcnt_wait_for_fin(au_lcnt_t *cnt __maybe_unused)
{
/* empty */
}
static inline void au_lcnt_fin(au_lcnt_t *cnt, int do_sync __maybe_unused)
{
percpu_counter_destroy(cnt);
}
static inline void au_lcnt_inc(au_lcnt_t *cnt)
{
percpu_counter_inc(cnt);
}
static inline void au_lcnt_dec(au_lcnt_t *cnt)
{
percpu_counter_dec(cnt);
}
static inline long au_lcnt_read(au_lcnt_t *cnt, int do_rev __maybe_unused)
{
s64 n;
n = percpu_counter_sum(cnt);
BUG_ON(n < 0);
if (LONG_MAX != LLONG_MAX
&& n > LONG_MAX)
AuWarn1("%s\n", "wrap-around");
return n;
}
#endif
#if AuLCntChosen == AuLCntPCPUREF
#include <linux/percpu-refcount.h>
typedef struct percpu_ref au_lcnt_t;
static inline int au_lcnt_init(au_lcnt_t *cnt, percpu_ref_func_t *release)
{
if (!release)
release = percpu_ref_exit;
return percpu_ref_init(cnt, release, /*percpu mode*/0, GFP_NOFS);
}
static inline void au_lcnt_wait_for_fin(au_lcnt_t *cnt __maybe_unused)
{
synchronize_rcu();
}
static inline void au_lcnt_fin(au_lcnt_t *cnt, int do_sync)
{
percpu_ref_kill(cnt);
if (do_sync)
au_lcnt_wait_for_fin(cnt);
}
static inline void au_lcnt_inc(au_lcnt_t *cnt)
{
percpu_ref_get(cnt);
}
static inline void au_lcnt_dec(au_lcnt_t *cnt)
{
percpu_ref_put(cnt);
}
/*
* avoid calling this func as possible.
*/
static inline long au_lcnt_read(au_lcnt_t *cnt, int do_rev)
{
long l;
percpu_ref_switch_to_atomic_sync(cnt);
l = atomic_long_read(&cnt->count);
if (do_rev)
percpu_ref_switch_to_percpu(cnt);
/* percpu_ref is initialized by 1 instead of 0 */
return l - 1;
}
#endif
#ifdef CONFIG_AUFS_DEBUG
#define AuLCntZero(val) do { \
long l = val; \
if (l) \
AuDbg("%s = %ld\n", #val, l); \
} while (0)
#else
#define AuLCntZero(val) do {} while (0)
#endif
#endif /* __KERNEL__ */
#endif /* __AUFS_LCNT_H__ */

148
fs/aufs/loop.c Normal file
View File

@ -0,0 +1,148 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* support for loopback block device as a branch
*/
#include "aufs.h"
/* added into drivers/block/loop.c */
static struct file *(*backing_file_func)(struct super_block *sb);
/*
* test if two lower dentries have overlapping branches.
*/
int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding)
{
struct super_block *h_sb;
struct file *backing_file;
if (unlikely(!backing_file_func)) {
/* don't load "loop" module here */
backing_file_func = symbol_get(loop_backing_file);
if (unlikely(!backing_file_func))
/* "loop" module is not loaded */
return 0;
}
h_sb = h_adding->d_sb;
backing_file = backing_file_func(h_sb);
if (!backing_file)
return 0;
h_adding = backing_file->f_path.dentry;
/*
* h_adding can be local NFS.
* in this case aufs cannot detect the loop.
*/
if (unlikely(h_adding->d_sb == sb))
return 1;
return !!au_test_subdir(h_adding, sb->s_root);
}
/* true if a kernel thread named 'loop[0-9].*' accesses a file */
int au_test_loopback_kthread(void)
{
int ret;
struct task_struct *tsk = current;
char c, comm[sizeof(tsk->comm)];
ret = 0;
if (tsk->flags & PF_KTHREAD) {
get_task_comm(comm, tsk);
c = comm[4];
ret = ('0' <= c && c <= '9'
&& !strncmp(comm, "loop", 4));
}
return ret;
}
/* ---------------------------------------------------------------------- */
#define au_warn_loopback_step 16
static int au_warn_loopback_nelem = au_warn_loopback_step;
static unsigned long *au_warn_loopback_array;
void au_warn_loopback(struct super_block *h_sb)
{
int i, new_nelem;
unsigned long *a, magic;
static DEFINE_SPINLOCK(spin);
magic = h_sb->s_magic;
spin_lock(&spin);
a = au_warn_loopback_array;
for (i = 0; i < au_warn_loopback_nelem && *a; i++)
if (a[i] == magic) {
spin_unlock(&spin);
return;
}
/* h_sb is new to us, print it */
if (i < au_warn_loopback_nelem) {
a[i] = magic;
goto pr;
}
/* expand the array */
new_nelem = au_warn_loopback_nelem + au_warn_loopback_step;
a = au_kzrealloc(au_warn_loopback_array,
au_warn_loopback_nelem * sizeof(unsigned long),
new_nelem * sizeof(unsigned long), GFP_ATOMIC,
/*may_shrink*/0);
if (a) {
au_warn_loopback_nelem = new_nelem;
au_warn_loopback_array = a;
a[i] = magic;
goto pr;
}
spin_unlock(&spin);
AuWarn1("realloc failed, ignored\n");
return;
pr:
spin_unlock(&spin);
pr_warn("you may want to try another patch for loopback file "
"on %s(0x%lx) branch\n", au_sbtype(h_sb), magic);
}
int au_loopback_init(void)
{
int err;
struct super_block *sb __maybe_unused;
BUILD_BUG_ON(sizeof(sb->s_magic) != sizeof(*au_warn_loopback_array));
err = 0;
au_warn_loopback_array = kcalloc(au_warn_loopback_step,
sizeof(unsigned long), GFP_NOFS);
if (unlikely(!au_warn_loopback_array))
err = -ENOMEM;
return err;
}
void au_loopback_fin(void)
{
if (backing_file_func)
symbol_put(loop_backing_file);
au_kfree_try_rcu(au_warn_loopback_array);
}

55
fs/aufs/loop.h Normal file
View File

@ -0,0 +1,55 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* support for loopback mount as a branch
*/
#ifndef __AUFS_LOOP_H__
#define __AUFS_LOOP_H__
#ifdef __KERNEL__
struct dentry;
struct super_block;
#ifdef CONFIG_AUFS_BDEV_LOOP
/* drivers/block/loop.c */
struct file *loop_backing_file(struct super_block *sb);
/* loop.c */
int au_test_loopback_overlap(struct super_block *sb, struct dentry *h_adding);
int au_test_loopback_kthread(void);
void au_warn_loopback(struct super_block *h_sb);
int au_loopback_init(void);
void au_loopback_fin(void);
#else
AuStub(struct file *, loop_backing_file, return NULL, struct super_block *sb)
AuStubInt0(au_test_loopback_overlap, struct super_block *sb,
struct dentry *h_adding)
AuStubInt0(au_test_loopback_kthread, void)
AuStubVoid(au_warn_loopback, struct super_block *h_sb)
AuStubInt0(au_loopback_init, void)
AuStubVoid(au_loopback_fin, void)
#endif /* BLK_DEV_LOOP */
#endif /* __KERNEL__ */
#endif /* __AUFS_LOOP_H__ */

31
fs/aufs/magic.mk Normal file
View File

@ -0,0 +1,31 @@
# SPDX-License-Identifier: GPL-2.0
# defined in ${srctree}/fs/fuse/inode.c
# tristate
ifdef CONFIG_FUSE_FS
ccflags-y += -DFUSE_SUPER_MAGIC=0x65735546
endif
# defined in ${srctree}/fs/xfs/xfs_sb.h
# tristate
ifdef CONFIG_XFS_FS
ccflags-y += -DXFS_SB_MAGIC=0x58465342
endif
# defined in ${srctree}/fs/configfs/mount.c
# tristate
ifdef CONFIG_CONFIGFS_FS
ccflags-y += -DCONFIGFS_MAGIC=0x62656570
endif
# defined in ${srctree}/fs/ubifs/ubifs.h
# tristate
ifdef CONFIG_UBIFS_FS
ccflags-y += -DUBIFS_SUPER_MAGIC=0x24051905
endif
# defined in ${srctree}/fs/hfsplus/hfsplus_raw.h
# tristate
ifdef CONFIG_HFSPLUS_FS
ccflags-y += -DHFSPLUS_SUPER_MAGIC=0x482b
endif

273
fs/aufs/module.c Normal file
View File

@ -0,0 +1,273 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* module global variables and operations
*/
#include <linux/module.h>
#include <linux/seq_file.h>
#include "aufs.h"
/* shrinkable realloc */
void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink)
{
size_t sz;
int diff;
sz = 0;
diff = -1;
if (p) {
#if 0 /* unused */
if (!new_sz) {
au_kfree_rcu(p);
p = NULL;
goto out;
}
#else
AuDebugOn(!new_sz);
#endif
sz = ksize(p);
diff = au_kmidx_sub(sz, new_sz);
}
if (sz && !diff)
goto out;
if (sz < new_sz)
/* expand or SLOB */
p = krealloc(p, new_sz, gfp);
else if (new_sz < sz && may_shrink) {
/* shrink */
void *q;
q = kmalloc(new_sz, gfp);
if (q) {
if (p) {
memcpy(q, p, new_sz);
au_kfree_try_rcu(p);
}
p = q;
} else
p = NULL;
}
out:
return p;
}
void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp,
int may_shrink)
{
p = au_krealloc(p, new_sz, gfp, may_shrink);
if (p && new_sz > nused)
memset(p + nused, 0, new_sz - nused);
return p;
}
/* ---------------------------------------------------------------------- */
/*
* aufs caches
*/
struct kmem_cache *au_cache[AuCache_Last];
static void au_cache_fin(void)
{
int i;
/*
* Make sure all delayed rcu free inodes are flushed before we
* destroy cache.
*/
rcu_barrier();
/* excluding AuCache_HNOTIFY */
BUILD_BUG_ON(AuCache_HNOTIFY + 1 != AuCache_Last);
for (i = 0; i < AuCache_HNOTIFY; i++) {
kmem_cache_destroy(au_cache[i]);
au_cache[i] = NULL;
}
}
static int __init au_cache_init(void)
{
au_cache[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once);
if (au_cache[AuCache_DINFO])
/* SLAB_DESTROY_BY_RCU */
au_cache[AuCache_ICNTNR] = AuCacheCtor(au_icntnr,
au_icntnr_init_once);
if (au_cache[AuCache_ICNTNR])
au_cache[AuCache_FINFO] = AuCacheCtor(au_finfo,
au_fi_init_once);
if (au_cache[AuCache_FINFO])
au_cache[AuCache_VDIR] = AuCache(au_vdir);
if (au_cache[AuCache_VDIR])
au_cache[AuCache_DEHSTR] = AuCache(au_vdir_dehstr);
if (au_cache[AuCache_DEHSTR])
return 0;
au_cache_fin();
return -ENOMEM;
}
/* ---------------------------------------------------------------------- */
int au_dir_roflags;
#ifdef CONFIG_AUFS_SBILIST
/*
* iterate_supers_type() doesn't protect us from
* remounting (branch management)
*/
struct hlist_bl_head au_sbilist;
#endif
/*
* functions for module interface.
*/
MODULE_LICENSE("GPL");
/* MODULE_LICENSE("GPL v2"); */
MODULE_AUTHOR("Junjiro R. Okajima <aufs-users@lists.sourceforge.net>");
MODULE_DESCRIPTION(AUFS_NAME
" -- Advanced multi layered unification filesystem");
MODULE_VERSION(AUFS_VERSION);
MODULE_ALIAS_FS(AUFS_NAME);
/* this module parameter has no meaning when SYSFS is disabled */
int sysaufs_brs = 1;
MODULE_PARM_DESC(brs, "use <sysfs>/fs/aufs/si_*/brN");
module_param_named(brs, sysaufs_brs, int, 0444);
/* this module parameter has no meaning when USER_NS is disabled */
bool au_userns;
MODULE_PARM_DESC(allow_userns, "allow unprivileged to mount under userns");
module_param_named(allow_userns, au_userns, bool, 0444);
/* ---------------------------------------------------------------------- */
static char au_esc_chars[0x20 + 3]; /* 0x01-0x20, backslash, del, and NULL */
int au_seq_path(struct seq_file *seq, struct path *path)
{
int err;
err = seq_path(seq, path, au_esc_chars);
if (err >= 0)
err = 0;
else
err = -ENOMEM;
return err;
}
/* ---------------------------------------------------------------------- */
static int __init aufs_init(void)
{
int err, i;
char *p;
p = au_esc_chars;
for (i = 1; i <= ' '; i++)
*p++ = i;
*p++ = '\\';
*p++ = '\x7f';
*p = 0;
au_dir_roflags = au_file_roflags(O_DIRECTORY | O_LARGEFILE);
memcpy(aufs_iop_nogetattr, aufs_iop, sizeof(aufs_iop));
for (i = 0; i < AuIop_Last; i++)
aufs_iop_nogetattr[i].getattr = NULL;
memset(au_cache, 0, sizeof(au_cache)); /* including hnotify */
au_sbilist_init();
sysaufs_brs_init();
au_debug_init();
au_dy_init();
err = sysaufs_init();
if (unlikely(err))
goto out;
err = dbgaufs_init();
if (unlikely(err))
goto out_sysaufs;
err = au_procfs_init();
if (unlikely(err))
goto out_dbgaufs;
err = au_wkq_init();
if (unlikely(err))
goto out_procfs;
err = au_loopback_init();
if (unlikely(err))
goto out_wkq;
err = au_hnotify_init();
if (unlikely(err))
goto out_loopback;
err = au_sysrq_init();
if (unlikely(err))
goto out_hin;
err = au_cache_init();
if (unlikely(err))
goto out_sysrq;
aufs_fs_type.fs_flags |= au_userns ? FS_USERNS_MOUNT : 0;
err = register_filesystem(&aufs_fs_type);
if (unlikely(err))
goto out_cache;
/* since we define pr_fmt, call printk directly */
printk(KERN_INFO AUFS_NAME " " AUFS_VERSION "\n");
goto out; /* success */
out_cache:
au_cache_fin();
out_sysrq:
au_sysrq_fin();
out_hin:
au_hnotify_fin();
out_loopback:
au_loopback_fin();
out_wkq:
au_wkq_fin();
out_procfs:
au_procfs_fin();
out_dbgaufs:
dbgaufs_fin();
out_sysaufs:
sysaufs_fin();
au_dy_fin();
out:
return err;
}
static void __exit aufs_exit(void)
{
unregister_filesystem(&aufs_fs_type);
au_cache_fin();
au_sysrq_fin();
au_hnotify_fin();
au_loopback_fin();
au_wkq_fin();
au_procfs_fin();
dbgaufs_fin();
sysaufs_fin();
au_dy_fin();
}
module_init(aufs_init);
module_exit(aufs_exit);

166
fs/aufs/module.h Normal file
View File

@ -0,0 +1,166 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* module initialization and module-global
*/
#ifndef __AUFS_MODULE_H__
#define __AUFS_MODULE_H__
#ifdef __KERNEL__
#include <linux/slab.h>
#include "debug.h"
#include "dentry.h"
#include "dir.h"
#include "file.h"
#include "inode.h"
struct path;
struct seq_file;
/* module parameters */
extern int sysaufs_brs;
extern bool au_userns;
/* ---------------------------------------------------------------------- */
extern int au_dir_roflags;
void *au_krealloc(void *p, unsigned int new_sz, gfp_t gfp, int may_shrink);
void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp,
int may_shrink);
/*
* Comparing the size of the object with sizeof(struct rcu_head)
* case 1: object is always larger
* --> au_kfree_rcu() or au_kfree_do_rcu()
* case 2: object is always smaller
* --> au_kfree_small()
* case 3: object can be any size
* --> au_kfree_try_rcu()
*/
static inline void au_kfree_do_rcu(const void *p)
{
struct {
struct rcu_head rcu;
} *a = (void *)p;
kfree_rcu(a, rcu);
}
#define au_kfree_rcu(_p) do { \
typeof(_p) p = (_p); \
BUILD_BUG_ON(sizeof(*p) < sizeof(struct rcu_head)); \
if (p) \
au_kfree_do_rcu(p); \
} while (0)
#define au_kfree_do_sz_test(sz) (sz >= sizeof(struct rcu_head))
#define au_kfree_sz_test(p) (p && au_kfree_do_sz_test(ksize(p)))
static inline void au_kfree_try_rcu(const void *p)
{
if (!p)
return;
if (au_kfree_sz_test(p))
au_kfree_do_rcu(p);
else
kfree(p);
}
static inline void au_kfree_small(const void *p)
{
if (!p)
return;
AuDebugOn(au_kfree_sz_test(p));
kfree(p);
}
static inline int au_kmidx_sub(size_t sz, size_t new_sz)
{
#ifndef CONFIG_SLOB
return kmalloc_index(sz) - kmalloc_index(new_sz);
#else
return -1; /* SLOB is untested */
#endif
}
int au_seq_path(struct seq_file *seq, struct path *path);
#ifdef CONFIG_PROC_FS
/* procfs.c */
int __init au_procfs_init(void);
void au_procfs_fin(void);
#else
AuStubInt0(au_procfs_init, void);
AuStubVoid(au_procfs_fin, void);
#endif
/* ---------------------------------------------------------------------- */
/* kmem cache */
enum {
AuCache_DINFO,
AuCache_ICNTNR,
AuCache_FINFO,
AuCache_VDIR,
AuCache_DEHSTR,
AuCache_HNOTIFY, /* must be last */
AuCache_Last
};
extern struct kmem_cache *au_cache[AuCache_Last];
#define AuCacheFlags (SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD)
#define AuCache(type) KMEM_CACHE(type, AuCacheFlags)
#define AuCacheCtor(type, ctor) \
kmem_cache_create(#type, sizeof(struct type), \
__alignof__(struct type), AuCacheFlags, ctor)
#define AuCacheFuncs(name, index) \
static inline struct au_##name *au_cache_alloc_##name(void) \
{ return kmem_cache_alloc(au_cache[AuCache_##index], GFP_NOFS); } \
static inline void au_cache_free_##name##_norcu(struct au_##name *p) \
{ kmem_cache_free(au_cache[AuCache_##index], p); } \
\
static inline void au_cache_free_##name##_rcu_cb(struct rcu_head *rcu) \
{ void *p = rcu; \
p -= offsetof(struct au_##name, rcu); \
kmem_cache_free(au_cache[AuCache_##index], p); } \
static inline void au_cache_free_##name##_rcu(struct au_##name *p) \
{ BUILD_BUG_ON(sizeof(struct au_##name) < sizeof(struct rcu_head)); \
call_rcu(&p->rcu, au_cache_free_##name##_rcu_cb); } \
\
static inline void au_cache_free_##name(struct au_##name *p) \
{ /* au_cache_free_##name##_norcu(p); */ \
au_cache_free_##name##_rcu(p); }
AuCacheFuncs(dinfo, DINFO);
AuCacheFuncs(icntnr, ICNTNR);
AuCacheFuncs(finfo, FINFO);
AuCacheFuncs(vdir, VDIR);
AuCacheFuncs(vdir_dehstr, DEHSTR);
#ifdef CONFIG_AUFS_HNOTIFY
AuCacheFuncs(hnotify, HNOTIFY);
#endif
#endif /* __KERNEL__ */
#endif /* __AUFS_MODULE_H__ */

706
fs/aufs/mvdown.c Normal file
View File

@ -0,0 +1,706 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2011-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* move-down, opposite of copy-up
*/
#include "aufs.h"
struct au_mvd_args {
struct {
struct super_block *h_sb;
struct dentry *h_parent;
struct au_hinode *hdir;
struct inode *h_dir, *h_inode;
struct au_pin pin;
} info[AUFS_MVDOWN_NARRAY];
struct aufs_mvdown mvdown;
struct dentry *dentry, *parent;
struct inode *inode, *dir;
struct super_block *sb;
aufs_bindex_t bopq, bwh, bfound;
unsigned char rename_lock;
};
#define mvd_errno mvdown.au_errno
#define mvd_bsrc mvdown.stbr[AUFS_MVDOWN_UPPER].bindex
#define mvd_src_brid mvdown.stbr[AUFS_MVDOWN_UPPER].brid
#define mvd_bdst mvdown.stbr[AUFS_MVDOWN_LOWER].bindex
#define mvd_dst_brid mvdown.stbr[AUFS_MVDOWN_LOWER].brid
#define mvd_h_src_sb info[AUFS_MVDOWN_UPPER].h_sb
#define mvd_h_src_parent info[AUFS_MVDOWN_UPPER].h_parent
#define mvd_hdir_src info[AUFS_MVDOWN_UPPER].hdir
#define mvd_h_src_dir info[AUFS_MVDOWN_UPPER].h_dir
#define mvd_h_src_inode info[AUFS_MVDOWN_UPPER].h_inode
#define mvd_pin_src info[AUFS_MVDOWN_UPPER].pin
#define mvd_h_dst_sb info[AUFS_MVDOWN_LOWER].h_sb
#define mvd_h_dst_parent info[AUFS_MVDOWN_LOWER].h_parent
#define mvd_hdir_dst info[AUFS_MVDOWN_LOWER].hdir
#define mvd_h_dst_dir info[AUFS_MVDOWN_LOWER].h_dir
#define mvd_h_dst_inode info[AUFS_MVDOWN_LOWER].h_inode
#define mvd_pin_dst info[AUFS_MVDOWN_LOWER].pin
#define AU_MVD_PR(flag, ...) do { \
if (flag) \
pr_err(__VA_ARGS__); \
} while (0)
static int find_lower_writable(struct au_mvd_args *a)
{
struct super_block *sb;
aufs_bindex_t bindex, bbot;
struct au_branch *br;
sb = a->sb;
bindex = a->mvd_bsrc;
bbot = au_sbbot(sb);
if (a->mvdown.flags & AUFS_MVDOWN_FHSM_LOWER)
for (bindex++; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
if (au_br_fhsm(br->br_perm)
&& !sb_rdonly(au_br_sb(br)))
return bindex;
}
else if (!(a->mvdown.flags & AUFS_MVDOWN_ROLOWER))
for (bindex++; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
if (!au_br_rdonly(br))
return bindex;
}
else
for (bindex++; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
if (!sb_rdonly(au_br_sb(br))) {
if (au_br_rdonly(br))
a->mvdown.flags
|= AUFS_MVDOWN_ROLOWER_R;
return bindex;
}
}
return -1;
}
/* make the parent dir on bdst */
static int au_do_mkdir(const unsigned char dmsg, struct au_mvd_args *a)
{
int err;
err = 0;
a->mvd_hdir_src = au_hi(a->dir, a->mvd_bsrc);
a->mvd_hdir_dst = au_hi(a->dir, a->mvd_bdst);
a->mvd_h_src_parent = au_h_dptr(a->parent, a->mvd_bsrc);
a->mvd_h_dst_parent = NULL;
if (au_dbbot(a->parent) >= a->mvd_bdst)
a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
if (!a->mvd_h_dst_parent) {
err = au_cpdown_dirs(a->dentry, a->mvd_bdst);
if (unlikely(err)) {
AU_MVD_PR(dmsg, "cpdown_dirs failed\n");
goto out;
}
a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
}
out:
AuTraceErr(err);
return err;
}
/* lock them all */
static int au_do_lock(const unsigned char dmsg, struct au_mvd_args *a)
{
int err;
struct dentry *h_trap;
a->mvd_h_src_sb = au_sbr_sb(a->sb, a->mvd_bsrc);
a->mvd_h_dst_sb = au_sbr_sb(a->sb, a->mvd_bdst);
err = au_pin(&a->mvd_pin_dst, a->dentry, a->mvd_bdst,
au_opt_udba(a->sb),
AuPin_MNT_WRITE | AuPin_DI_LOCKED);
AuTraceErr(err);
if (unlikely(err)) {
AU_MVD_PR(dmsg, "pin_dst failed\n");
goto out;
}
if (a->mvd_h_src_sb != a->mvd_h_dst_sb) {
a->rename_lock = 0;
au_pin_init(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
AuLsc_DI_PARENT, AuLsc_I_PARENT3,
au_opt_udba(a->sb),
AuPin_MNT_WRITE | AuPin_DI_LOCKED);
err = au_do_pin(&a->mvd_pin_src);
AuTraceErr(err);
a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
if (unlikely(err)) {
AU_MVD_PR(dmsg, "pin_src failed\n");
goto out_dst;
}
goto out; /* success */
}
a->rename_lock = 1;
au_pin_hdir_unlock(&a->mvd_pin_dst);
err = au_pin(&a->mvd_pin_src, a->dentry, a->mvd_bsrc,
au_opt_udba(a->sb),
AuPin_MNT_WRITE | AuPin_DI_LOCKED);
AuTraceErr(err);
a->mvd_h_src_dir = d_inode(a->mvd_h_src_parent);
if (unlikely(err)) {
AU_MVD_PR(dmsg, "pin_src failed\n");
au_pin_hdir_lock(&a->mvd_pin_dst);
goto out_dst;
}
au_pin_hdir_unlock(&a->mvd_pin_src);
h_trap = vfsub_lock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
a->mvd_h_dst_parent, a->mvd_hdir_dst);
if (h_trap) {
err = (h_trap != a->mvd_h_src_parent);
if (err)
err = (h_trap != a->mvd_h_dst_parent);
}
BUG_ON(err); /* it should never happen */
if (unlikely(a->mvd_h_src_dir != au_pinned_h_dir(&a->mvd_pin_src))) {
err = -EBUSY;
AuTraceErr(err);
vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
a->mvd_h_dst_parent, a->mvd_hdir_dst);
au_pin_hdir_lock(&a->mvd_pin_src);
au_unpin(&a->mvd_pin_src);
au_pin_hdir_lock(&a->mvd_pin_dst);
goto out_dst;
}
goto out; /* success */
out_dst:
au_unpin(&a->mvd_pin_dst);
out:
AuTraceErr(err);
return err;
}
static void au_do_unlock(const unsigned char dmsg, struct au_mvd_args *a)
{
if (!a->rename_lock)
au_unpin(&a->mvd_pin_src);
else {
vfsub_unlock_rename(a->mvd_h_src_parent, a->mvd_hdir_src,
a->mvd_h_dst_parent, a->mvd_hdir_dst);
au_pin_hdir_lock(&a->mvd_pin_src);
au_unpin(&a->mvd_pin_src);
au_pin_hdir_lock(&a->mvd_pin_dst);
}
au_unpin(&a->mvd_pin_dst);
}
/* copy-down the file */
static int au_do_cpdown(const unsigned char dmsg, struct au_mvd_args *a)
{
int err;
struct au_cp_generic cpg = {
.dentry = a->dentry,
.bdst = a->mvd_bdst,
.bsrc = a->mvd_bsrc,
.len = -1,
.pin = &a->mvd_pin_dst,
.flags = AuCpup_DTIME | AuCpup_HOPEN
};
AuDbg("b%d, b%d\n", cpg.bsrc, cpg.bdst);
if (a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
au_fset_cpup(cpg.flags, OVERWRITE);
if (a->mvdown.flags & AUFS_MVDOWN_ROLOWER)
au_fset_cpup(cpg.flags, RWDST);
err = au_sio_cpdown_simple(&cpg);
if (unlikely(err))
AU_MVD_PR(dmsg, "cpdown failed\n");
AuTraceErr(err);
return err;
}
/*
* unlink the whiteout on bdst if exist which may be created by UDBA while we
* were sleeping
*/
static int au_do_unlink_wh(const unsigned char dmsg, struct au_mvd_args *a)
{
int err;
struct path h_path;
struct au_branch *br;
struct inode *delegated;
br = au_sbr(a->sb, a->mvd_bdst);
h_path.dentry = au_wh_lkup(a->mvd_h_dst_parent, &a->dentry->d_name, br);
err = PTR_ERR(h_path.dentry);
if (IS_ERR(h_path.dentry)) {
AU_MVD_PR(dmsg, "wh_lkup failed\n");
goto out;
}
err = 0;
if (d_is_positive(h_path.dentry)) {
h_path.mnt = au_br_mnt(br);
delegated = NULL;
err = vfsub_unlink(d_inode(a->mvd_h_dst_parent), &h_path,
&delegated, /*force*/0);
if (unlikely(err == -EWOULDBLOCK)) {
pr_warn("cannot retry for NFSv4 delegation"
" for an internal unlink\n");
iput(delegated);
}
if (unlikely(err))
AU_MVD_PR(dmsg, "wh_unlink failed\n");
}
dput(h_path.dentry);
out:
AuTraceErr(err);
return err;
}
/*
* unlink the topmost h_dentry
*/
static int au_do_unlink(const unsigned char dmsg, struct au_mvd_args *a)
{
int err;
struct path h_path;
struct inode *delegated;
h_path.mnt = au_sbr_mnt(a->sb, a->mvd_bsrc);
h_path.dentry = au_h_dptr(a->dentry, a->mvd_bsrc);
delegated = NULL;
err = vfsub_unlink(a->mvd_h_src_dir, &h_path, &delegated, /*force*/0);
if (unlikely(err == -EWOULDBLOCK)) {
pr_warn("cannot retry for NFSv4 delegation"
" for an internal unlink\n");
iput(delegated);
}
if (unlikely(err))
AU_MVD_PR(dmsg, "unlink failed\n");
AuTraceErr(err);
return err;
}
/* Since mvdown succeeded, we ignore an error of this function */
static void au_do_stfs(const unsigned char dmsg, struct au_mvd_args *a)
{
int err;
struct au_branch *br;
a->mvdown.flags |= AUFS_MVDOWN_STFS_FAILED;
br = au_sbr(a->sb, a->mvd_bsrc);
err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_UPPER].stfs);
if (!err) {
br = au_sbr(a->sb, a->mvd_bdst);
a->mvdown.stbr[AUFS_MVDOWN_LOWER].brid = br->br_id;
err = au_br_stfs(br, &a->mvdown.stbr[AUFS_MVDOWN_LOWER].stfs);
}
if (!err)
a->mvdown.flags &= ~AUFS_MVDOWN_STFS_FAILED;
else
AU_MVD_PR(dmsg, "statfs failed (%d), ignored\n", err);
}
/*
* copy-down the file and unlink the bsrc file.
* - unlink the bdst whout if exist
* - copy-down the file (with whtmp name and rename)
* - unlink the bsrc file
*/
static int au_do_mvdown(const unsigned char dmsg, struct au_mvd_args *a)
{
int err;
err = au_do_mkdir(dmsg, a);
if (!err)
err = au_do_lock(dmsg, a);
if (unlikely(err))
goto out;
/*
* do not revert the activities we made on bdst since they should be
* harmless in aufs.
*/
err = au_do_cpdown(dmsg, a);
if (!err)
err = au_do_unlink_wh(dmsg, a);
if (!err && !(a->mvdown.flags & AUFS_MVDOWN_KUPPER))
err = au_do_unlink(dmsg, a);
if (unlikely(err))
goto out_unlock;
AuDbg("%pd2, 0x%x, %d --> %d\n",
a->dentry, a->mvdown.flags, a->mvd_bsrc, a->mvd_bdst);
if (find_lower_writable(a) < 0)
a->mvdown.flags |= AUFS_MVDOWN_BOTTOM;
if (a->mvdown.flags & AUFS_MVDOWN_STFS)
au_do_stfs(dmsg, a);
/* maintain internal array */
if (!(a->mvdown.flags & AUFS_MVDOWN_KUPPER)) {
au_set_h_dptr(a->dentry, a->mvd_bsrc, NULL);
au_set_dbtop(a->dentry, a->mvd_bdst);
au_set_h_iptr(a->inode, a->mvd_bsrc, NULL, /*flags*/0);
au_set_ibtop(a->inode, a->mvd_bdst);
} else {
/* hide the lower */
au_set_h_dptr(a->dentry, a->mvd_bdst, NULL);
au_set_dbbot(a->dentry, a->mvd_bsrc);
au_set_h_iptr(a->inode, a->mvd_bdst, NULL, /*flags*/0);
au_set_ibbot(a->inode, a->mvd_bsrc);
}
if (au_dbbot(a->dentry) < a->mvd_bdst)
au_set_dbbot(a->dentry, a->mvd_bdst);
if (au_ibbot(a->inode) < a->mvd_bdst)
au_set_ibbot(a->inode, a->mvd_bdst);
out_unlock:
au_do_unlock(dmsg, a);
out:
AuTraceErr(err);
return err;
}
/* ---------------------------------------------------------------------- */
/* make sure the file is idle */
static int au_mvd_args_busy(const unsigned char dmsg, struct au_mvd_args *a)
{
int err, plinked;
err = 0;
plinked = !!au_opt_test(au_mntflags(a->sb), PLINK);
if (au_dbtop(a->dentry) == a->mvd_bsrc
&& au_dcount(a->dentry) == 1
&& atomic_read(&a->inode->i_count) == 1
/* && a->mvd_h_src_inode->i_nlink == 1 */
&& (!plinked || !au_plink_test(a->inode))
&& a->inode->i_nlink == 1)
goto out;
err = -EBUSY;
AU_MVD_PR(dmsg,
"b%d, d{b%d, c%d?}, i{c%d?, l%u}, hi{l%u}, p{%d, %d}\n",
a->mvd_bsrc, au_dbtop(a->dentry), au_dcount(a->dentry),
atomic_read(&a->inode->i_count), a->inode->i_nlink,
a->mvd_h_src_inode->i_nlink,
plinked, plinked ? au_plink_test(a->inode) : 0);
out:
AuTraceErr(err);
return err;
}
/* make sure the parent dir is fine */
static int au_mvd_args_parent(const unsigned char dmsg,
struct au_mvd_args *a)
{
int err;
aufs_bindex_t bindex;
err = 0;
if (unlikely(au_alive_dir(a->parent))) {
err = -ENOENT;
AU_MVD_PR(dmsg, "parent dir is dead\n");
goto out;
}
a->bopq = au_dbdiropq(a->parent);
bindex = au_wbr_nonopq(a->dentry, a->mvd_bdst);
AuDbg("b%d\n", bindex);
if (unlikely((bindex >= 0 && bindex < a->mvd_bdst)
|| (a->bopq != -1 && a->bopq < a->mvd_bdst))) {
err = -EINVAL;
a->mvd_errno = EAU_MVDOWN_OPAQUE;
AU_MVD_PR(dmsg, "ancestor is opaque b%d, b%d\n",
a->bopq, a->mvd_bdst);
}
out:
AuTraceErr(err);
return err;
}
static int au_mvd_args_intermediate(const unsigned char dmsg,
struct au_mvd_args *a)
{
int err;
struct au_dinfo *dinfo, *tmp;
/* lookup the next lower positive entry */
err = -ENOMEM;
tmp = au_di_alloc(a->sb, AuLsc_DI_TMP);
if (unlikely(!tmp))
goto out;
a->bfound = -1;
a->bwh = -1;
dinfo = au_di(a->dentry);
au_di_cp(tmp, dinfo);
au_di_swap(tmp, dinfo);
/* returns the number of positive dentries */
err = au_lkup_dentry(a->dentry, a->mvd_bsrc + 1,
/* AuLkup_IGNORE_PERM */ 0);
if (!err)
a->bwh = au_dbwh(a->dentry);
else if (err > 0)
a->bfound = au_dbtop(a->dentry);
au_di_swap(tmp, dinfo);
au_rw_write_unlock(&tmp->di_rwsem);
au_di_free(tmp);
if (unlikely(err < 0))
AU_MVD_PR(dmsg, "failed look-up lower\n");
/*
* here, we have these cases.
* bfound == -1
* no positive dentry under bsrc. there are more sub-cases.
* bwh < 0
* there no whiteout, we can safely move-down.
* bwh <= bsrc
* impossible
* bsrc < bwh && bwh < bdst
* there is a whiteout on RO branch. cannot proceed.
* bwh == bdst
* there is a whiteout on the RW target branch. it should
* be removed.
* bdst < bwh
* there is a whiteout somewhere unrelated branch.
* -1 < bfound && bfound <= bsrc
* impossible.
* bfound < bdst
* found, but it is on RO branch between bsrc and bdst. cannot
* proceed.
* bfound == bdst
* found, replace it if AUFS_MVDOWN_FORCE is set. otherwise return
* error.
* bdst < bfound
* found, after we create the file on bdst, it will be hidden.
*/
AuDebugOn(a->bfound == -1
&& a->bwh != -1
&& a->bwh <= a->mvd_bsrc);
AuDebugOn(-1 < a->bfound
&& a->bfound <= a->mvd_bsrc);
err = -EINVAL;
if (a->bfound == -1
&& a->mvd_bsrc < a->bwh
&& a->bwh != -1
&& a->bwh < a->mvd_bdst) {
a->mvd_errno = EAU_MVDOWN_WHITEOUT;
AU_MVD_PR(dmsg, "bsrc %d, bdst %d, bfound %d, bwh %d\n",
a->mvd_bsrc, a->mvd_bdst, a->bfound, a->bwh);
goto out;
} else if (a->bfound != -1 && a->bfound < a->mvd_bdst) {
a->mvd_errno = EAU_MVDOWN_UPPER;
AU_MVD_PR(dmsg, "bdst %d, bfound %d\n",
a->mvd_bdst, a->bfound);
goto out;
}
err = 0; /* success */
out:
AuTraceErr(err);
return err;
}
static int au_mvd_args_exist(const unsigned char dmsg, struct au_mvd_args *a)
{
int err;
err = 0;
if (!(a->mvdown.flags & AUFS_MVDOWN_OWLOWER)
&& a->bfound == a->mvd_bdst)
err = -EEXIST;
AuTraceErr(err);
return err;
}
static int au_mvd_args(const unsigned char dmsg, struct au_mvd_args *a)
{
int err;
struct au_branch *br;
err = -EISDIR;
if (unlikely(S_ISDIR(a->inode->i_mode)))
goto out;
err = -EINVAL;
if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_UPPER))
a->mvd_bsrc = au_ibtop(a->inode);
else {
a->mvd_bsrc = au_br_index(a->sb, a->mvd_src_brid);
if (unlikely(a->mvd_bsrc < 0
|| (a->mvd_bsrc < au_dbtop(a->dentry)
|| au_dbbot(a->dentry) < a->mvd_bsrc
|| !au_h_dptr(a->dentry, a->mvd_bsrc))
|| (a->mvd_bsrc < au_ibtop(a->inode)
|| au_ibbot(a->inode) < a->mvd_bsrc
|| !au_h_iptr(a->inode, a->mvd_bsrc)))) {
a->mvd_errno = EAU_MVDOWN_NOUPPER;
AU_MVD_PR(dmsg, "no upper\n");
goto out;
}
}
if (unlikely(a->mvd_bsrc == au_sbbot(a->sb))) {
a->mvd_errno = EAU_MVDOWN_BOTTOM;
AU_MVD_PR(dmsg, "on the bottom\n");
goto out;
}
a->mvd_h_src_inode = au_h_iptr(a->inode, a->mvd_bsrc);
br = au_sbr(a->sb, a->mvd_bsrc);
err = au_br_rdonly(br);
if (!(a->mvdown.flags & AUFS_MVDOWN_ROUPPER)) {
if (unlikely(err))
goto out;
} else if (!(vfsub_native_ro(a->mvd_h_src_inode)
|| IS_APPEND(a->mvd_h_src_inode))) {
if (err)
a->mvdown.flags |= AUFS_MVDOWN_ROUPPER_R;
/* go on */
} else
goto out;
err = -EINVAL;
if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_LOWER)) {
a->mvd_bdst = find_lower_writable(a);
if (unlikely(a->mvd_bdst < 0)) {
a->mvd_errno = EAU_MVDOWN_BOTTOM;
AU_MVD_PR(dmsg, "no writable lower branch\n");
goto out;
}
} else {
a->mvd_bdst = au_br_index(a->sb, a->mvd_dst_brid);
if (unlikely(a->mvd_bdst < 0
|| au_sbbot(a->sb) < a->mvd_bdst)) {
a->mvd_errno = EAU_MVDOWN_NOLOWERBR;
AU_MVD_PR(dmsg, "no lower brid\n");
goto out;
}
}
err = au_mvd_args_busy(dmsg, a);
if (!err)
err = au_mvd_args_parent(dmsg, a);
if (!err)
err = au_mvd_args_intermediate(dmsg, a);
if (!err)
err = au_mvd_args_exist(dmsg, a);
if (!err)
AuDbg("b%d, b%d\n", a->mvd_bsrc, a->mvd_bdst);
out:
AuTraceErr(err);
return err;
}
int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *uarg)
{
int err, e;
unsigned char dmsg;
struct au_mvd_args *args;
struct inode *inode;
inode = d_inode(dentry);
err = -EPERM;
if (unlikely(!capable(CAP_SYS_ADMIN)))
goto out;
err = -ENOMEM;
args = kmalloc(sizeof(*args), GFP_NOFS);
if (unlikely(!args))
goto out;
err = copy_from_user(&args->mvdown, uarg, sizeof(args->mvdown));
if (!err)
/* VERIFY_WRITE */
err = !access_ok(uarg, sizeof(*uarg));
if (unlikely(err)) {
err = -EFAULT;
AuTraceErr(err);
goto out_free;
}
AuDbg("flags 0x%x\n", args->mvdown.flags);
args->mvdown.flags &= ~(AUFS_MVDOWN_ROLOWER_R | AUFS_MVDOWN_ROUPPER_R);
args->mvdown.au_errno = 0;
args->dentry = dentry;
args->inode = inode;
args->sb = dentry->d_sb;
err = -ENOENT;
dmsg = !!(args->mvdown.flags & AUFS_MVDOWN_DMSG);
args->parent = dget_parent(dentry);
args->dir = d_inode(args->parent);
inode_lock_nested(args->dir, I_MUTEX_PARENT);
dput(args->parent);
if (unlikely(args->parent != dentry->d_parent)) {
AU_MVD_PR(dmsg, "parent dir is moved\n");
goto out_dir;
}
inode_lock_nested(inode, I_MUTEX_CHILD);
err = aufs_read_lock(dentry, AuLock_DW | AuLock_FLUSH | AuLock_NOPLMW);
if (unlikely(err))
goto out_inode;
di_write_lock_parent(args->parent);
err = au_mvd_args(dmsg, args);
if (unlikely(err))
goto out_parent;
err = au_do_mvdown(dmsg, args);
if (unlikely(err))
goto out_parent;
au_cpup_attr_timesizes(args->dir);
au_cpup_attr_timesizes(inode);
if (!(args->mvdown.flags & AUFS_MVDOWN_KUPPER))
au_cpup_igen(inode, au_h_iptr(inode, args->mvd_bdst));
/* au_digen_dec(dentry); */
out_parent:
di_write_unlock(args->parent);
aufs_read_unlock(dentry, AuLock_DW);
out_inode:
inode_unlock(inode);
out_dir:
inode_unlock(args->dir);
out_free:
e = copy_to_user(uarg, &args->mvdown, sizeof(args->mvdown));
if (unlikely(e))
err = -EFAULT;
au_kfree_rcu(args);
out:
AuTraceErr(err);
return err;
}

1880
fs/aufs/opts.c Normal file

File diff suppressed because it is too large Load Diff

225
fs/aufs/opts.h Normal file
View File

@ -0,0 +1,225 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* mount options/flags
*/
#ifndef __AUFS_OPTS_H__
#define __AUFS_OPTS_H__
#ifdef __KERNEL__
#include <linux/path.h>
struct file;
/* ---------------------------------------------------------------------- */
/* mount flags */
#define AuOpt_XINO 1 /* external inode number bitmap
and translation table */
#define AuOpt_TRUNC_XINO (1 << 1) /* truncate xino files */
#define AuOpt_UDBA_NONE (1 << 2) /* users direct branch access */
#define AuOpt_UDBA_REVAL (1 << 3)
#define AuOpt_UDBA_HNOTIFY (1 << 4)
#define AuOpt_SHWH (1 << 5) /* show whiteout */
#define AuOpt_PLINK (1 << 6) /* pseudo-link */
#define AuOpt_DIRPERM1 (1 << 7) /* ignore the lower dir's perm
bits */
#define AuOpt_ALWAYS_DIROPQ (1 << 9) /* policy to creating diropq */
#define AuOpt_SUM (1 << 10) /* summation for statfs(2) */
#define AuOpt_SUM_W (1 << 11) /* unimplemented */
#define AuOpt_WARN_PERM (1 << 12) /* warn when add-branch */
#define AuOpt_VERBOSE (1 << 13) /* print the cause of error */
#define AuOpt_DIO (1 << 14) /* direct io */
#define AuOpt_DIRREN (1 << 15) /* directory rename */
#ifndef CONFIG_AUFS_HNOTIFY
#undef AuOpt_UDBA_HNOTIFY
#define AuOpt_UDBA_HNOTIFY 0
#endif
#ifndef CONFIG_AUFS_DIRREN
#undef AuOpt_DIRREN
#define AuOpt_DIRREN 0
#endif
#ifndef CONFIG_AUFS_SHWH
#undef AuOpt_SHWH
#define AuOpt_SHWH 0
#endif
#define AuOpt_Def (AuOpt_XINO \
| AuOpt_UDBA_REVAL \
| AuOpt_PLINK \
/* | AuOpt_DIRPERM1 */ \
| AuOpt_WARN_PERM)
#define AuOptMask_UDBA (AuOpt_UDBA_NONE \
| AuOpt_UDBA_REVAL \
| AuOpt_UDBA_HNOTIFY)
#define au_opt_test(flags, name) (flags & AuOpt_##name)
#define au_opt_set(flags, name) do { \
BUILD_BUG_ON(AuOpt_##name & AuOptMask_UDBA); \
((flags) |= AuOpt_##name); \
} while (0)
#define au_opt_set_udba(flags, name) do { \
(flags) &= ~AuOptMask_UDBA; \
((flags) |= AuOpt_##name); \
} while (0)
#define au_opt_clr(flags, name) do { \
((flags) &= ~AuOpt_##name); \
} while (0)
static inline unsigned int au_opts_plink(unsigned int mntflags)
{
#ifdef CONFIG_PROC_FS
return mntflags;
#else
return mntflags & ~AuOpt_PLINK;
#endif
}
/* ---------------------------------------------------------------------- */
/* policies to select one among multiple writable branches */
enum {
AuWbrCreate_TDP, /* top down parent */
AuWbrCreate_RR, /* round robin */
AuWbrCreate_MFS, /* most free space */
AuWbrCreate_MFSV, /* mfs with seconds */
AuWbrCreate_MFSRR, /* mfs then rr */
AuWbrCreate_MFSRRV, /* mfs then rr with seconds */
AuWbrCreate_TDMFS, /* top down regardless parent and mfs */
AuWbrCreate_TDMFSV, /* top down regardless parent and mfs */
AuWbrCreate_PMFS, /* parent and mfs */
AuWbrCreate_PMFSV, /* parent and mfs with seconds */
AuWbrCreate_PMFSRR, /* parent, mfs and round-robin */
AuWbrCreate_PMFSRRV, /* plus seconds */
AuWbrCreate_Def = AuWbrCreate_TDP
};
enum {
AuWbrCopyup_TDP, /* top down parent */
AuWbrCopyup_BUP, /* bottom up parent */
AuWbrCopyup_BU, /* bottom up */
AuWbrCopyup_Def = AuWbrCopyup_TDP
};
/* ---------------------------------------------------------------------- */
struct au_opt_add {
aufs_bindex_t bindex;
char *pathname;
int perm;
struct path path;
};
struct au_opt_del {
char *pathname;
struct path h_path;
};
struct au_opt_mod {
char *path;
int perm;
struct dentry *h_root;
};
struct au_opt_xino {
char *path;
struct file *file;
};
struct au_opt_xino_itrunc {
aufs_bindex_t bindex;
};
struct au_opt_wbr_create {
int wbr_create;
int mfs_second;
unsigned long long mfsrr_watermark;
};
struct au_opt {
int type;
union {
struct au_opt_xino xino;
struct au_opt_xino_itrunc xino_itrunc;
struct au_opt_add add;
struct au_opt_del del;
struct au_opt_mod mod;
int dirwh;
int rdcache;
unsigned int rdblk;
unsigned int rdhash;
int udba;
struct au_opt_wbr_create wbr_create;
int wbr_copyup;
unsigned int fhsm_second;
};
};
/* opts flags */
#define AuOpts_REMOUNT 1
#define AuOpts_REFRESH (1 << 1)
#define AuOpts_TRUNC_XIB (1 << 2)
#define AuOpts_REFRESH_DYAOP (1 << 3)
#define AuOpts_REFRESH_IDOP (1 << 4)
#define AuOpts_DR_FLUSHED (1 << 5)
#define au_ftest_opts(flags, name) ((flags) & AuOpts_##name)
#define au_fset_opts(flags, name) \
do { (flags) |= AuOpts_##name; } while (0)
#define au_fclr_opts(flags, name) \
do { (flags) &= ~AuOpts_##name; } while (0)
#ifndef CONFIG_AUFS_DIRREN
#undef AuOpts_DR_FLUSHED
#define AuOpts_DR_FLUSHED 0
#endif
struct au_opts {
struct au_opt *opt;
int max_opt;
unsigned int given_udba;
unsigned int flags;
unsigned long sb_flags;
};
/* ---------------------------------------------------------------------- */
/* opts.c */
void au_optstr_br_perm(au_br_perm_str_t *str, int perm);
const char *au_optstr_udba(int udba);
const char *au_optstr_wbr_copyup(int wbr_copyup);
const char *au_optstr_wbr_create(int wbr_create);
void au_opts_free(struct au_opts *opts);
struct super_block;
int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts);
int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
unsigned int pending);
int au_opts_mount(struct super_block *sb, struct au_opts *opts);
int au_opts_remount(struct super_block *sb, struct au_opts *opts);
unsigned int au_opt_udba(struct super_block *sb);
#endif /* __KERNEL__ */
#endif /* __AUFS_OPTS_H__ */

516
fs/aufs/plink.c Normal file
View File

@ -0,0 +1,516 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* pseudo-link
*/
#include "aufs.h"
/*
* the pseudo-link maintenance mode.
* during a user process maintains the pseudo-links,
* prohibit adding a new plink and branch manipulation.
*
* Flags
* NOPLM:
* For entry functions which will handle plink, and i_mutex is already held
* in VFS.
* They cannot wait and should return an error at once.
* Callers has to check the error.
* NOPLMW:
* For entry functions which will handle plink, but i_mutex is not held
* in VFS.
* They can wait the plink maintenance mode to finish.
*
* They behave like F_SETLK and F_SETLKW.
* If the caller never handle plink, then both flags are unnecessary.
*/
int au_plink_maint(struct super_block *sb, int flags)
{
int err;
pid_t pid, ppid;
struct task_struct *parent, *prev;
struct au_sbinfo *sbi;
SiMustAnyLock(sb);
err = 0;
if (!au_opt_test(au_mntflags(sb), PLINK))
goto out;
sbi = au_sbi(sb);
pid = sbi->si_plink_maint_pid;
if (!pid || pid == current->pid)
goto out;
/* todo: it highly depends upon /sbin/mount.aufs */
prev = NULL;
parent = current;
ppid = 0;
rcu_read_lock();
while (1) {
parent = rcu_dereference(parent->real_parent);
if (parent == prev)
break;
ppid = task_pid_vnr(parent);
if (pid == ppid) {
rcu_read_unlock();
goto out;
}
prev = parent;
}
rcu_read_unlock();
if (au_ftest_lock(flags, NOPLMW)) {
/* if there is no i_mutex lock in VFS, we don't need to wait */
/* AuDebugOn(!lockdep_depth(current)); */
while (sbi->si_plink_maint_pid) {
si_read_unlock(sb);
/* gave up wake_up_bit() */
wait_event(sbi->si_plink_wq, !sbi->si_plink_maint_pid);
if (au_ftest_lock(flags, FLUSH))
au_nwt_flush(&sbi->si_nowait);
si_noflush_read_lock(sb);
}
} else if (au_ftest_lock(flags, NOPLM)) {
AuDbg("ppid %d, pid %d\n", ppid, pid);
err = -EAGAIN;
}
out:
return err;
}
void au_plink_maint_leave(struct au_sbinfo *sbinfo)
{
spin_lock(&sbinfo->si_plink_maint_lock);
sbinfo->si_plink_maint_pid = 0;
spin_unlock(&sbinfo->si_plink_maint_lock);
wake_up_all(&sbinfo->si_plink_wq);
}
int au_plink_maint_enter(struct super_block *sb)
{
int err;
struct au_sbinfo *sbinfo;
err = 0;
sbinfo = au_sbi(sb);
/* make sure i am the only one in this fs */
si_write_lock(sb, AuLock_FLUSH);
if (au_opt_test(au_mntflags(sb), PLINK)) {
spin_lock(&sbinfo->si_plink_maint_lock);
if (!sbinfo->si_plink_maint_pid)
sbinfo->si_plink_maint_pid = current->pid;
else
err = -EBUSY;
spin_unlock(&sbinfo->si_plink_maint_lock);
}
si_write_unlock(sb);
return err;
}
/* ---------------------------------------------------------------------- */
#ifdef CONFIG_AUFS_DEBUG
void au_plink_list(struct super_block *sb)
{
int i;
struct au_sbinfo *sbinfo;
struct hlist_bl_head *hbl;
struct hlist_bl_node *pos;
struct au_icntnr *icntnr;
SiMustAnyLock(sb);
sbinfo = au_sbi(sb);
AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
for (i = 0; i < AuPlink_NHASH; i++) {
hbl = sbinfo->si_plink + i;
hlist_bl_lock(hbl);
hlist_bl_for_each_entry(icntnr, pos, hbl, plink)
AuDbg("%lu\n", icntnr->vfs_inode.i_ino);
hlist_bl_unlock(hbl);
}
}
#endif
/* is the inode pseudo-linked? */
int au_plink_test(struct inode *inode)
{
int found, i;
struct au_sbinfo *sbinfo;
struct hlist_bl_head *hbl;
struct hlist_bl_node *pos;
struct au_icntnr *icntnr;
sbinfo = au_sbi(inode->i_sb);
AuRwMustAnyLock(&sbinfo->si_rwsem);
AuDebugOn(!au_opt_test(au_mntflags(inode->i_sb), PLINK));
AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
found = 0;
i = au_plink_hash(inode->i_ino);
hbl = sbinfo->si_plink + i;
hlist_bl_lock(hbl);
hlist_bl_for_each_entry(icntnr, pos, hbl, plink)
if (&icntnr->vfs_inode == inode) {
found = 1;
break;
}
hlist_bl_unlock(hbl);
return found;
}
/* ---------------------------------------------------------------------- */
/*
* generate a name for plink.
* the file will be stored under AUFS_WH_PLINKDIR.
*/
/* 20 is max digits length of ulong 64 */
#define PLINK_NAME_LEN ((20 + 1) * 2)
static int plink_name(char *name, int len, struct inode *inode,
aufs_bindex_t bindex)
{
int rlen;
struct inode *h_inode;
h_inode = au_h_iptr(inode, bindex);
rlen = snprintf(name, len, "%lu.%lu", inode->i_ino, h_inode->i_ino);
return rlen;
}
struct au_do_plink_lkup_args {
struct dentry **errp;
struct qstr *tgtname;
struct dentry *h_parent;
struct au_branch *br;
};
static struct dentry *au_do_plink_lkup(struct qstr *tgtname,
struct dentry *h_parent,
struct au_branch *br)
{
struct dentry *h_dentry;
struct inode *h_inode;
h_inode = d_inode(h_parent);
inode_lock_shared_nested(h_inode, AuLsc_I_CHILD2);
h_dentry = vfsub_lkup_one(tgtname, h_parent);
inode_unlock_shared(h_inode);
return h_dentry;
}
static void au_call_do_plink_lkup(void *args)
{
struct au_do_plink_lkup_args *a = args;
*a->errp = au_do_plink_lkup(a->tgtname, a->h_parent, a->br);
}
/* lookup the plink-ed @inode under the branch at @bindex */
struct dentry *au_plink_lkup(struct inode *inode, aufs_bindex_t bindex)
{
struct dentry *h_dentry, *h_parent;
struct au_branch *br;
int wkq_err;
char a[PLINK_NAME_LEN];
struct qstr tgtname = QSTR_INIT(a, 0);
AuDebugOn(au_plink_maint(inode->i_sb, AuLock_NOPLM));
br = au_sbr(inode->i_sb, bindex);
h_parent = br->br_wbr->wbr_plink;
tgtname.len = plink_name(a, sizeof(a), inode, bindex);
if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
struct au_do_plink_lkup_args args = {
.errp = &h_dentry,
.tgtname = &tgtname,
.h_parent = h_parent,
.br = br
};
wkq_err = au_wkq_wait(au_call_do_plink_lkup, &args);
if (unlikely(wkq_err))
h_dentry = ERR_PTR(wkq_err);
} else
h_dentry = au_do_plink_lkup(&tgtname, h_parent, br);
return h_dentry;
}
/* create a pseudo-link */
static int do_whplink(struct qstr *tgt, struct dentry *h_parent,
struct dentry *h_dentry, struct au_branch *br)
{
int err;
struct path h_path = {
.mnt = au_br_mnt(br)
};
struct inode *h_dir, *delegated;
h_dir = d_inode(h_parent);
inode_lock_nested(h_dir, AuLsc_I_CHILD2);
again:
h_path.dentry = vfsub_lkup_one(tgt, h_parent);
err = PTR_ERR(h_path.dentry);
if (IS_ERR(h_path.dentry))
goto out;
err = 0;
/* wh.plink dir is not monitored */
/* todo: is it really safe? */
if (d_is_positive(h_path.dentry)
&& d_inode(h_path.dentry) != d_inode(h_dentry)) {
delegated = NULL;
err = vfsub_unlink(h_dir, &h_path, &delegated, /*force*/0);
if (unlikely(err == -EWOULDBLOCK)) {
pr_warn("cannot retry for NFSv4 delegation"
" for an internal unlink\n");
iput(delegated);
}
dput(h_path.dentry);
h_path.dentry = NULL;
if (!err)
goto again;
}
if (!err && d_is_negative(h_path.dentry)) {
delegated = NULL;
err = vfsub_link(h_dentry, h_dir, &h_path, &delegated);
if (unlikely(err == -EWOULDBLOCK)) {
pr_warn("cannot retry for NFSv4 delegation"
" for an internal link\n");
iput(delegated);
}
}
dput(h_path.dentry);
out:
inode_unlock(h_dir);
return err;
}
struct do_whplink_args {
int *errp;
struct qstr *tgt;
struct dentry *h_parent;
struct dentry *h_dentry;
struct au_branch *br;
};
static void call_do_whplink(void *args)
{
struct do_whplink_args *a = args;
*a->errp = do_whplink(a->tgt, a->h_parent, a->h_dentry, a->br);
}
static int whplink(struct dentry *h_dentry, struct inode *inode,
aufs_bindex_t bindex, struct au_branch *br)
{
int err, wkq_err;
struct au_wbr *wbr;
struct dentry *h_parent;
char a[PLINK_NAME_LEN];
struct qstr tgtname = QSTR_INIT(a, 0);
wbr = au_sbr(inode->i_sb, bindex)->br_wbr;
h_parent = wbr->wbr_plink;
tgtname.len = plink_name(a, sizeof(a), inode, bindex);
/* always superio. */
if (!uid_eq(current_fsuid(), GLOBAL_ROOT_UID)) {
struct do_whplink_args args = {
.errp = &err,
.tgt = &tgtname,
.h_parent = h_parent,
.h_dentry = h_dentry,
.br = br
};
wkq_err = au_wkq_wait(call_do_whplink, &args);
if (unlikely(wkq_err))
err = wkq_err;
} else
err = do_whplink(&tgtname, h_parent, h_dentry, br);
return err;
}
/*
* create a new pseudo-link for @h_dentry on @bindex.
* the linked inode is held in aufs @inode.
*/
void au_plink_append(struct inode *inode, aufs_bindex_t bindex,
struct dentry *h_dentry)
{
struct super_block *sb;
struct au_sbinfo *sbinfo;
struct hlist_bl_head *hbl;
struct hlist_bl_node *pos;
struct au_icntnr *icntnr;
int found, err, cnt, i;
sb = inode->i_sb;
sbinfo = au_sbi(sb);
AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
found = au_plink_test(inode);
if (found)
return;
i = au_plink_hash(inode->i_ino);
hbl = sbinfo->si_plink + i;
au_igrab(inode);
hlist_bl_lock(hbl);
hlist_bl_for_each_entry(icntnr, pos, hbl, plink) {
if (&icntnr->vfs_inode == inode) {
found = 1;
break;
}
}
if (!found) {
icntnr = container_of(inode, struct au_icntnr, vfs_inode);
hlist_bl_add_head(&icntnr->plink, hbl);
}
hlist_bl_unlock(hbl);
if (!found) {
cnt = au_hbl_count(hbl);
#define msg "unexpectedly unbalanced or too many pseudo-links"
if (cnt > AUFS_PLINK_WARN)
AuWarn1(msg ", %d\n", cnt);
#undef msg
err = whplink(h_dentry, inode, bindex, au_sbr(sb, bindex));
if (unlikely(err)) {
pr_warn("err %d, damaged pseudo link.\n", err);
au_hbl_del(&icntnr->plink, hbl);
iput(&icntnr->vfs_inode);
}
} else
iput(&icntnr->vfs_inode);
}
/* free all plinks */
void au_plink_put(struct super_block *sb, int verbose)
{
int i, warned;
struct au_sbinfo *sbinfo;
struct hlist_bl_head *hbl;
struct hlist_bl_node *pos, *tmp;
struct au_icntnr *icntnr;
SiMustWriteLock(sb);
sbinfo = au_sbi(sb);
AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
/* no spin_lock since sbinfo is write-locked */
warned = 0;
for (i = 0; i < AuPlink_NHASH; i++) {
hbl = sbinfo->si_plink + i;
if (!warned && verbose && !hlist_bl_empty(hbl)) {
pr_warn("pseudo-link is not flushed");
warned = 1;
}
hlist_bl_for_each_entry_safe(icntnr, pos, tmp, hbl, plink)
iput(&icntnr->vfs_inode);
INIT_HLIST_BL_HEAD(hbl);
}
}
void au_plink_clean(struct super_block *sb, int verbose)
{
struct dentry *root;
root = sb->s_root;
aufs_write_lock(root);
if (au_opt_test(au_mntflags(sb), PLINK))
au_plink_put(sb, verbose);
aufs_write_unlock(root);
}
static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id)
{
int do_put;
aufs_bindex_t btop, bbot, bindex;
do_put = 0;
btop = au_ibtop(inode);
bbot = au_ibbot(inode);
if (btop >= 0) {
for (bindex = btop; bindex <= bbot; bindex++) {
if (!au_h_iptr(inode, bindex)
|| au_ii_br_id(inode, bindex) != br_id)
continue;
au_set_h_iptr(inode, bindex, NULL, 0);
do_put = 1;
break;
}
if (do_put)
for (bindex = btop; bindex <= bbot; bindex++)
if (au_h_iptr(inode, bindex)) {
do_put = 0;
break;
}
} else
do_put = 1;
return do_put;
}
/* free the plinks on a branch specified by @br_id */
void au_plink_half_refresh(struct super_block *sb, aufs_bindex_t br_id)
{
struct au_sbinfo *sbinfo;
struct hlist_bl_head *hbl;
struct hlist_bl_node *pos, *tmp;
struct au_icntnr *icntnr;
struct inode *inode;
int i, do_put;
SiMustWriteLock(sb);
sbinfo = au_sbi(sb);
AuDebugOn(!au_opt_test(au_mntflags(sb), PLINK));
AuDebugOn(au_plink_maint(sb, AuLock_NOPLM));
/* no bit_lock since sbinfo is write-locked */
for (i = 0; i < AuPlink_NHASH; i++) {
hbl = sbinfo->si_plink + i;
hlist_bl_for_each_entry_safe(icntnr, pos, tmp, hbl, plink) {
inode = au_igrab(&icntnr->vfs_inode);
ii_write_lock_child(inode);
do_put = au_plink_do_half_refresh(inode, br_id);
if (do_put) {
hlist_bl_del(&icntnr->plink);
iput(inode);
}
ii_write_unlock(inode);
iput(inode);
}
}
}

51
fs/aufs/poll.c Normal file
View File

@ -0,0 +1,51 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* poll operation
* There is only one filesystem which implements ->poll operation, currently.
*/
#include "aufs.h"
__poll_t aufs_poll(struct file *file, struct poll_table_struct *pt)
{
__poll_t mask;
struct file *h_file;
struct super_block *sb;
/* We should pretend an error happened. */
mask = EPOLLERR /* | EPOLLIN | EPOLLOUT */;
sb = file->f_path.dentry->d_sb;
si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
h_file = au_read_pre(file, /*keep_fi*/0, /*lsc*/0);
if (IS_ERR(h_file)) {
AuDbg("h_file %ld\n", PTR_ERR(h_file));
goto out;
}
mask = vfs_poll(h_file, pt);
fput(h_file); /* instead of au_read_post() */
out:
si_read_unlock(sb);
if (mask & EPOLLERR)
AuDbg("mask 0x%x\n", mask);
return mask;
}

105
fs/aufs/posix_acl.c Normal file
View File

@ -0,0 +1,105 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2014-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* posix acl operations
*/
#include <linux/fs.h>
#include "aufs.h"
struct posix_acl *aufs_get_acl(struct inode *inode, int type)
{
struct posix_acl *acl;
int err;
aufs_bindex_t bindex;
struct inode *h_inode;
struct super_block *sb;
acl = NULL;
sb = inode->i_sb;
si_read_lock(sb, AuLock_FLUSH);
ii_read_lock_child(inode);
if (!(sb->s_flags & SB_POSIXACL))
goto out;
bindex = au_ibtop(inode);
h_inode = au_h_iptr(inode, bindex);
if (unlikely(!h_inode
|| ((h_inode->i_mode & S_IFMT)
!= (inode->i_mode & S_IFMT)))) {
err = au_busy_or_stale();
acl = ERR_PTR(err);
goto out;
}
/* always topmost only */
acl = get_acl(h_inode, type);
if (IS_ERR(acl))
forget_cached_acl(inode, type);
else
set_cached_acl(inode, type, acl);
out:
ii_read_unlock(inode);
si_read_unlock(sb);
AuTraceErrPtr(acl);
return acl;
}
int aufs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
{
int err;
ssize_t ssz;
struct dentry *dentry;
struct au_sxattr arg = {
.type = AU_ACL_SET,
.u.acl_set = {
.acl = acl,
.type = type
},
};
IMustLock(inode);
if (inode->i_ino == AUFS_ROOT_INO)
dentry = dget(inode->i_sb->s_root);
else {
dentry = d_find_alias(inode);
if (!dentry)
dentry = d_find_any_alias(inode);
if (!dentry) {
pr_warn("cannot handle this inode, "
"please report to aufs-users ML\n");
err = -ENOENT;
goto out;
}
}
ssz = au_sxattr(dentry, inode, &arg);
/* forget even it if succeeds since the branch might set differently */
forget_cached_acl(inode, type);
dput(dentry);
err = ssz;
if (ssz >= 0)
err = 0;
out:
return err;
}

171
fs/aufs/procfs.c Normal file
View File

@ -0,0 +1,171 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2010-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* procfs interfaces
*/
#include <linux/proc_fs.h>
#include "aufs.h"
static int au_procfs_plm_release(struct inode *inode, struct file *file)
{
struct au_sbinfo *sbinfo;
sbinfo = file->private_data;
if (sbinfo) {
au_plink_maint_leave(sbinfo);
kobject_put(&sbinfo->si_kobj);
}
return 0;
}
static void au_procfs_plm_write_clean(struct file *file)
{
struct au_sbinfo *sbinfo;
sbinfo = file->private_data;
if (sbinfo)
au_plink_clean(sbinfo->si_sb, /*verbose*/0);
}
static int au_procfs_plm_write_si(struct file *file, unsigned long id)
{
int err;
struct super_block *sb;
struct au_sbinfo *sbinfo;
struct hlist_bl_node *pos;
err = -EBUSY;
if (unlikely(file->private_data))
goto out;
sb = NULL;
/* don't use au_sbilist_lock() here */
hlist_bl_lock(&au_sbilist);
hlist_bl_for_each_entry(sbinfo, pos, &au_sbilist, si_list)
if (id == sysaufs_si_id(sbinfo)) {
if (kobject_get_unless_zero(&sbinfo->si_kobj))
sb = sbinfo->si_sb;
break;
}
hlist_bl_unlock(&au_sbilist);
err = -EINVAL;
if (unlikely(!sb))
goto out;
err = au_plink_maint_enter(sb);
if (!err)
/* keep kobject_get() */
file->private_data = sbinfo;
else
kobject_put(&sbinfo->si_kobj);
out:
return err;
}
/*
* Accept a valid "si=xxxx" only.
* Once it is accepted successfully, accept "clean" too.
*/
static ssize_t au_procfs_plm_write(struct file *file, const char __user *ubuf,
size_t count, loff_t *ppos)
{
ssize_t err;
unsigned long id;
/* last newline is allowed */
char buf[3 + sizeof(unsigned long) * 2 + 1];
err = -EACCES;
if (unlikely(!capable(CAP_SYS_ADMIN)))
goto out;
err = -EINVAL;
if (unlikely(count > sizeof(buf)))
goto out;
err = copy_from_user(buf, ubuf, count);
if (unlikely(err)) {
err = -EFAULT;
goto out;
}
buf[count] = 0;
err = -EINVAL;
if (!strcmp("clean", buf)) {
au_procfs_plm_write_clean(file);
goto out_success;
} else if (unlikely(strncmp("si=", buf, 3)))
goto out;
err = kstrtoul(buf + 3, 16, &id);
if (unlikely(err))
goto out;
err = au_procfs_plm_write_si(file, id);
if (unlikely(err))
goto out;
out_success:
err = count; /* success */
out:
return err;
}
static const struct file_operations au_procfs_plm_fop = {
.write = au_procfs_plm_write,
.release = au_procfs_plm_release,
.owner = THIS_MODULE
};
/* ---------------------------------------------------------------------- */
static struct proc_dir_entry *au_procfs_dir;
void au_procfs_fin(void)
{
remove_proc_entry(AUFS_PLINK_MAINT_NAME, au_procfs_dir);
remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
}
int __init au_procfs_init(void)
{
int err;
struct proc_dir_entry *entry;
err = -ENOMEM;
au_procfs_dir = proc_mkdir(AUFS_PLINK_MAINT_DIR, NULL);
if (unlikely(!au_procfs_dir))
goto out;
entry = proc_create(AUFS_PLINK_MAINT_NAME, S_IFREG | 0200,
au_procfs_dir, &au_procfs_plm_fop);
if (unlikely(!entry))
goto out_dir;
err = 0;
goto out; /* success */
out_dir:
remove_proc_entry(AUFS_PLINK_MAINT_DIR, NULL);
out:
return err;
}

384
fs/aufs/rdu.c Normal file
View File

@ -0,0 +1,384 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* readdir in userspace.
*/
#include <linux/compat.h>
#include <linux/fs_stack.h>
#include <linux/security.h>
#include "aufs.h"
/* bits for struct aufs_rdu.flags */
#define AuRdu_CALLED 1
#define AuRdu_CONT (1 << 1)
#define AuRdu_FULL (1 << 2)
#define au_ftest_rdu(flags, name) ((flags) & AuRdu_##name)
#define au_fset_rdu(flags, name) \
do { (flags) |= AuRdu_##name; } while (0)
#define au_fclr_rdu(flags, name) \
do { (flags) &= ~AuRdu_##name; } while (0)
struct au_rdu_arg {
struct dir_context ctx;
struct aufs_rdu *rdu;
union au_rdu_ent_ul ent;
unsigned long end;
struct super_block *sb;
int err;
};
static int au_rdu_fill(struct dir_context *ctx, const char *name, int nlen,
loff_t offset, u64 h_ino, unsigned int d_type)
{
int err, len;
struct au_rdu_arg *arg = container_of(ctx, struct au_rdu_arg, ctx);
struct aufs_rdu *rdu = arg->rdu;
struct au_rdu_ent ent;
err = 0;
arg->err = 0;
au_fset_rdu(rdu->cookie.flags, CALLED);
len = au_rdu_len(nlen);
if (arg->ent.ul + len < arg->end) {
ent.ino = h_ino;
ent.bindex = rdu->cookie.bindex;
ent.type = d_type;
ent.nlen = nlen;
if (unlikely(nlen > AUFS_MAX_NAMELEN))
ent.type = DT_UNKNOWN;
/* unnecessary to support mmap_sem since this is a dir */
err = -EFAULT;
if (copy_to_user(arg->ent.e, &ent, sizeof(ent)))
goto out;
if (copy_to_user(arg->ent.e->name, name, nlen))
goto out;
/* the terminating NULL */
if (__put_user(0, arg->ent.e->name + nlen))
goto out;
err = 0;
/* AuDbg("%p, %.*s\n", arg->ent.p, nlen, name); */
arg->ent.ul += len;
rdu->rent++;
} else {
err = -EFAULT;
au_fset_rdu(rdu->cookie.flags, FULL);
rdu->full = 1;
rdu->tail = arg->ent;
}
out:
/* AuTraceErr(err); */
return err;
}
static int au_rdu_do(struct file *h_file, struct au_rdu_arg *arg)
{
int err;
loff_t offset;
struct au_rdu_cookie *cookie = &arg->rdu->cookie;
/* we don't have to care (FMODE_32BITHASH | FMODE_64BITHASH) for ext4 */
offset = vfsub_llseek(h_file, cookie->h_pos, SEEK_SET);
err = offset;
if (unlikely(offset != cookie->h_pos))
goto out;
err = 0;
do {
arg->err = 0;
au_fclr_rdu(cookie->flags, CALLED);
/* smp_mb(); */
err = vfsub_iterate_dir(h_file, &arg->ctx);
if (err >= 0)
err = arg->err;
} while (!err
&& au_ftest_rdu(cookie->flags, CALLED)
&& !au_ftest_rdu(cookie->flags, FULL));
cookie->h_pos = h_file->f_pos;
out:
AuTraceErr(err);
return err;
}
static int au_rdu(struct file *file, struct aufs_rdu *rdu)
{
int err;
aufs_bindex_t bbot;
struct au_rdu_arg arg = {
.ctx = {
.actor = au_rdu_fill
}
};
struct dentry *dentry;
struct inode *inode;
struct file *h_file;
struct au_rdu_cookie *cookie = &rdu->cookie;
/* VERIFY_WRITE */
err = !access_ok(rdu->ent.e, rdu->sz);
if (unlikely(err)) {
err = -EFAULT;
AuTraceErr(err);
goto out;
}
rdu->rent = 0;
rdu->tail = rdu->ent;
rdu->full = 0;
arg.rdu = rdu;
arg.ent = rdu->ent;
arg.end = arg.ent.ul;
arg.end += rdu->sz;
err = -ENOTDIR;
if (unlikely(!file->f_op->iterate && !file->f_op->iterate_shared))
goto out;
err = security_file_permission(file, MAY_READ);
AuTraceErr(err);
if (unlikely(err))
goto out;
dentry = file->f_path.dentry;
inode = d_inode(dentry);
inode_lock_shared(inode);
arg.sb = inode->i_sb;
err = si_read_lock(arg.sb, AuLock_FLUSH | AuLock_NOPLM);
if (unlikely(err))
goto out_mtx;
err = au_alive_dir(dentry);
if (unlikely(err))
goto out_si;
/* todo: reval? */
fi_read_lock(file);
err = -EAGAIN;
if (unlikely(au_ftest_rdu(cookie->flags, CONT)
&& cookie->generation != au_figen(file)))
goto out_unlock;
err = 0;
if (!rdu->blk) {
rdu->blk = au_sbi(arg.sb)->si_rdblk;
if (!rdu->blk)
rdu->blk = au_dir_size(file, /*dentry*/NULL);
}
bbot = au_fbtop(file);
if (cookie->bindex < bbot)
cookie->bindex = bbot;
bbot = au_fbbot_dir(file);
/* AuDbg("b%d, b%d\n", cookie->bindex, bbot); */
for (; !err && cookie->bindex <= bbot;
cookie->bindex++, cookie->h_pos = 0) {
h_file = au_hf_dir(file, cookie->bindex);
if (!h_file)
continue;
au_fclr_rdu(cookie->flags, FULL);
err = au_rdu_do(h_file, &arg);
AuTraceErr(err);
if (unlikely(au_ftest_rdu(cookie->flags, FULL) || err))
break;
}
AuDbg("rent %llu\n", rdu->rent);
if (!err && !au_ftest_rdu(cookie->flags, CONT)) {
rdu->shwh = !!au_opt_test(au_sbi(arg.sb)->si_mntflags, SHWH);
au_fset_rdu(cookie->flags, CONT);
cookie->generation = au_figen(file);
}
ii_read_lock_child(inode);
fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibtop(inode)));
ii_read_unlock(inode);
out_unlock:
fi_read_unlock(file);
out_si:
si_read_unlock(arg.sb);
out_mtx:
inode_unlock_shared(inode);
out:
AuTraceErr(err);
return err;
}
static int au_rdu_ino(struct file *file, struct aufs_rdu *rdu)
{
int err;
ino_t ino;
unsigned long long nent;
union au_rdu_ent_ul *u;
struct au_rdu_ent ent;
struct super_block *sb;
err = 0;
nent = rdu->nent;
u = &rdu->ent;
sb = file->f_path.dentry->d_sb;
si_read_lock(sb, AuLock_FLUSH);
while (nent-- > 0) {
/* unnecessary to support mmap_sem since this is a dir */
err = copy_from_user(&ent, u->e, sizeof(ent));
if (!err)
/* VERIFY_WRITE */
err = !access_ok(&u->e->ino, sizeof(ino));
if (unlikely(err)) {
err = -EFAULT;
AuTraceErr(err);
break;
}
/* AuDbg("b%d, i%llu\n", ent.bindex, ent.ino); */
if (!ent.wh)
err = au_ino(sb, ent.bindex, ent.ino, ent.type, &ino);
else
err = au_wh_ino(sb, ent.bindex, ent.ino, ent.type,
&ino);
if (unlikely(err)) {
AuTraceErr(err);
break;
}
err = __put_user(ino, &u->e->ino);
if (unlikely(err)) {
err = -EFAULT;
AuTraceErr(err);
break;
}
u->ul += au_rdu_len(ent.nlen);
}
si_read_unlock(sb);
return err;
}
/* ---------------------------------------------------------------------- */
static int au_rdu_verify(struct aufs_rdu *rdu)
{
AuDbg("rdu{%llu, %p, %u | %u | %llu, %u, %u | "
"%llu, b%d, 0x%x, g%u}\n",
rdu->sz, rdu->ent.e, rdu->verify[AufsCtlRduV_SZ],
rdu->blk,
rdu->rent, rdu->shwh, rdu->full,
rdu->cookie.h_pos, rdu->cookie.bindex, rdu->cookie.flags,
rdu->cookie.generation);
if (rdu->verify[AufsCtlRduV_SZ] == sizeof(*rdu))
return 0;
AuDbg("%u:%u\n",
rdu->verify[AufsCtlRduV_SZ], (unsigned int)sizeof(*rdu));
return -EINVAL;
}
long au_rdu_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
long err, e;
struct aufs_rdu rdu;
void __user *p = (void __user *)arg;
err = copy_from_user(&rdu, p, sizeof(rdu));
if (unlikely(err)) {
err = -EFAULT;
AuTraceErr(err);
goto out;
}
err = au_rdu_verify(&rdu);
if (unlikely(err))
goto out;
switch (cmd) {
case AUFS_CTL_RDU:
err = au_rdu(file, &rdu);
if (unlikely(err))
break;
e = copy_to_user(p, &rdu, sizeof(rdu));
if (unlikely(e)) {
err = -EFAULT;
AuTraceErr(err);
}
break;
case AUFS_CTL_RDU_INO:
err = au_rdu_ino(file, &rdu);
break;
default:
/* err = -ENOTTY; */
err = -EINVAL;
}
out:
AuTraceErr(err);
return err;
}
#ifdef CONFIG_COMPAT
long au_rdu_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
{
long err, e;
struct aufs_rdu rdu;
void __user *p = compat_ptr(arg);
/* todo: get_user()? */
err = copy_from_user(&rdu, p, sizeof(rdu));
if (unlikely(err)) {
err = -EFAULT;
AuTraceErr(err);
goto out;
}
rdu.ent.e = compat_ptr(rdu.ent.ul);
err = au_rdu_verify(&rdu);
if (unlikely(err))
goto out;
switch (cmd) {
case AUFS_CTL_RDU:
err = au_rdu(file, &rdu);
if (unlikely(err))
break;
rdu.ent.ul = ptr_to_compat(rdu.ent.e);
rdu.tail.ul = ptr_to_compat(rdu.tail.e);
e = copy_to_user(p, &rdu, sizeof(rdu));
if (unlikely(e)) {
err = -EFAULT;
AuTraceErr(err);
}
break;
case AUFS_CTL_RDU_INO:
err = au_rdu_ino(file, &rdu);
break;
default:
/* err = -ENOTTY; */
err = -EINVAL;
}
out:
AuTraceErr(err);
return err;
}
#endif

73
fs/aufs/rwsem.h Normal file
View File

@ -0,0 +1,73 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* simple read-write semaphore wrappers
*/
#ifndef __AUFS_RWSEM_H__
#define __AUFS_RWSEM_H__
#ifdef __KERNEL__
#include "debug.h"
/* in the future, the name 'au_rwsem' will be totally gone */
#define au_rwsem rw_semaphore
/* to debug easier, do not make them inlined functions */
#define AuRwMustNoWaiters(rw) AuDebugOn(rwsem_is_contended(rw))
/* rwsem_is_locked() is unusable */
#define AuRwMustReadLock(rw) AuDebugOn(!lockdep_recursing(current) \
&& debug_locks \
&& !lockdep_is_held_type(rw, 1))
#define AuRwMustWriteLock(rw) AuDebugOn(!lockdep_recursing(current) \
&& debug_locks \
&& !lockdep_is_held_type(rw, 0))
#define AuRwMustAnyLock(rw) AuDebugOn(!lockdep_recursing(current) \
&& debug_locks \
&& !lockdep_is_held(rw))
#define AuRwDestroy(rw) AuDebugOn(!lockdep_recursing(current) \
&& debug_locks \
&& lockdep_is_held(rw))
#define au_rw_init(rw) init_rwsem(rw)
#define au_rw_init_wlock(rw) do { \
au_rw_init(rw); \
down_write(rw); \
} while (0)
#define au_rw_init_wlock_nested(rw, lsc) do { \
au_rw_init(rw); \
down_write_nested(rw, lsc); \
} while (0)
#define au_rw_read_lock(rw) down_read(rw)
#define au_rw_read_lock_nested(rw, lsc) down_read_nested(rw, lsc)
#define au_rw_read_unlock(rw) up_read(rw)
#define au_rw_dgrade_lock(rw) downgrade_write(rw)
#define au_rw_write_lock(rw) down_write(rw)
#define au_rw_write_lock_nested(rw, lsc) down_write_nested(rw, lsc)
#define au_rw_write_unlock(rw) up_write(rw)
/* why is not _nested version defined? */
#define au_rw_read_trylock(rw) down_read_trylock(rw)
#define au_rw_write_trylock(rw) down_write_trylock(rw)
#endif /* __KERNEL__ */
#endif /* __AUFS_RWSEM_H__ */

314
fs/aufs/sbinfo.c Normal file
View File

@ -0,0 +1,314 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* superblock private data
*/
#include <linux/iversion.h>
#include "aufs.h"
/*
* they are necessary regardless sysfs is disabled.
*/
void au_si_free(struct kobject *kobj)
{
int i;
struct au_sbinfo *sbinfo;
char *locked __maybe_unused; /* debug only */
sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
for (i = 0; i < AuPlink_NHASH; i++)
AuDebugOn(!hlist_bl_empty(sbinfo->si_plink + i));
AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
AuLCntZero(au_lcnt_read(&sbinfo->si_ninodes, /*do_rev*/0));
au_lcnt_fin(&sbinfo->si_ninodes, /*do_sync*/0);
AuLCntZero(au_lcnt_read(&sbinfo->si_nfiles, /*do_rev*/0));
au_lcnt_fin(&sbinfo->si_nfiles, /*do_sync*/0);
dbgaufs_si_fin(sbinfo);
au_rw_write_lock(&sbinfo->si_rwsem);
au_br_free(sbinfo);
au_rw_write_unlock(&sbinfo->si_rwsem);
au_kfree_try_rcu(sbinfo->si_branch);
mutex_destroy(&sbinfo->si_xib_mtx);
AuRwDestroy(&sbinfo->si_rwsem);
au_lcnt_wait_for_fin(&sbinfo->si_ninodes);
/* si_nfiles is waited too */
au_kfree_rcu(sbinfo);
}
int au_si_alloc(struct super_block *sb)
{
int err, i;
struct au_sbinfo *sbinfo;
err = -ENOMEM;
sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
if (unlikely(!sbinfo))
goto out;
/* will be reallocated separately */
sbinfo->si_branch = kzalloc(sizeof(*sbinfo->si_branch), GFP_NOFS);
if (unlikely(!sbinfo->si_branch))
goto out_sbinfo;
err = sysaufs_si_init(sbinfo);
if (!err) {
dbgaufs_si_null(sbinfo);
err = dbgaufs_si_init(sbinfo);
if (unlikely(err))
kobject_put(&sbinfo->si_kobj);
}
if (unlikely(err))
goto out_br;
au_nwt_init(&sbinfo->si_nowait);
au_rw_init_wlock(&sbinfo->si_rwsem);
au_lcnt_init(&sbinfo->si_ninodes, /*release*/NULL);
au_lcnt_init(&sbinfo->si_nfiles, /*release*/NULL);
sbinfo->si_bbot = -1;
sbinfo->si_last_br_id = AUFS_BRANCH_MAX / 2;
sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
sbinfo->si_wbr_create = AuWbrCreate_Def;
sbinfo->si_wbr_copyup_ops = au_wbr_copyup_ops + sbinfo->si_wbr_copyup;
sbinfo->si_wbr_create_ops = au_wbr_create_ops + sbinfo->si_wbr_create;
au_fhsm_init(sbinfo);
sbinfo->si_mntflags = au_opts_plink(AuOpt_Def);
sbinfo->si_xino_jiffy = jiffies;
sbinfo->si_xino_expire
= msecs_to_jiffies(AUFS_XINO_DEF_SEC * MSEC_PER_SEC);
mutex_init(&sbinfo->si_xib_mtx);
/* leave si_xib_last_pindex and si_xib_next_bit */
INIT_HLIST_BL_HEAD(&sbinfo->si_aopen);
sbinfo->si_rdcache = msecs_to_jiffies(AUFS_RDCACHE_DEF * MSEC_PER_SEC);
sbinfo->si_rdblk = AUFS_RDBLK_DEF;
sbinfo->si_rdhash = AUFS_RDHASH_DEF;
sbinfo->si_dirwh = AUFS_DIRWH_DEF;
for (i = 0; i < AuPlink_NHASH; i++)
INIT_HLIST_BL_HEAD(sbinfo->si_plink + i);
init_waitqueue_head(&sbinfo->si_plink_wq);
spin_lock_init(&sbinfo->si_plink_maint_lock);
INIT_HLIST_BL_HEAD(&sbinfo->si_files);
/* with getattr by default */
sbinfo->si_iop_array = aufs_iop;
/* leave other members for sysaufs and si_mnt. */
sbinfo->si_sb = sb;
sb->s_fs_info = sbinfo;
si_pid_set(sb);
return 0; /* success */
out_br:
au_kfree_try_rcu(sbinfo->si_branch);
out_sbinfo:
au_kfree_rcu(sbinfo);
out:
return err;
}
int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink)
{
int err, sz;
struct au_branch **brp;
AuRwMustWriteLock(&sbinfo->si_rwsem);
err = -ENOMEM;
sz = sizeof(*brp) * (sbinfo->si_bbot + 1);
if (unlikely(!sz))
sz = sizeof(*brp);
brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS,
may_shrink);
if (brp) {
sbinfo->si_branch = brp;
err = 0;
}
return err;
}
/* ---------------------------------------------------------------------- */
unsigned int au_sigen_inc(struct super_block *sb)
{
unsigned int gen;
struct inode *inode;
SiMustWriteLock(sb);
gen = ++au_sbi(sb)->si_generation;
au_update_digen(sb->s_root);
inode = d_inode(sb->s_root);
au_update_iigen(inode, /*half*/0);
inode_inc_iversion(inode);
return gen;
}
aufs_bindex_t au_new_br_id(struct super_block *sb)
{
aufs_bindex_t br_id;
int i;
struct au_sbinfo *sbinfo;
SiMustWriteLock(sb);
sbinfo = au_sbi(sb);
for (i = 0; i <= AUFS_BRANCH_MAX; i++) {
br_id = ++sbinfo->si_last_br_id;
AuDebugOn(br_id < 0);
if (br_id && au_br_index(sb, br_id) < 0)
return br_id;
}
return -1;
}
/* ---------------------------------------------------------------------- */
/* it is ok that new 'nwt' tasks are appended while we are sleeping */
int si_read_lock(struct super_block *sb, int flags)
{
int err;
err = 0;
if (au_ftest_lock(flags, FLUSH))
au_nwt_flush(&au_sbi(sb)->si_nowait);
si_noflush_read_lock(sb);
err = au_plink_maint(sb, flags);
if (unlikely(err))
si_read_unlock(sb);
return err;
}
int si_write_lock(struct super_block *sb, int flags)
{
int err;
if (au_ftest_lock(flags, FLUSH))
au_nwt_flush(&au_sbi(sb)->si_nowait);
si_noflush_write_lock(sb);
err = au_plink_maint(sb, flags);
if (unlikely(err))
si_write_unlock(sb);
return err;
}
/* dentry and super_block lock. call at entry point */
int aufs_read_lock(struct dentry *dentry, int flags)
{
int err;
struct super_block *sb;
sb = dentry->d_sb;
err = si_read_lock(sb, flags);
if (unlikely(err))
goto out;
if (au_ftest_lock(flags, DW))
di_write_lock_child(dentry);
else
di_read_lock_child(dentry, flags);
if (au_ftest_lock(flags, GEN)) {
err = au_digen_test(dentry, au_sigen(sb));
if (!au_opt_test(au_mntflags(sb), UDBA_NONE))
AuDebugOn(!err && au_dbrange_test(dentry));
else if (!err)
err = au_dbrange_test(dentry);
if (unlikely(err))
aufs_read_unlock(dentry, flags);
}
out:
return err;
}
void aufs_read_unlock(struct dentry *dentry, int flags)
{
if (au_ftest_lock(flags, DW))
di_write_unlock(dentry);
else
di_read_unlock(dentry, flags);
si_read_unlock(dentry->d_sb);
}
void aufs_write_lock(struct dentry *dentry)
{
si_write_lock(dentry->d_sb, AuLock_FLUSH | AuLock_NOPLMW);
di_write_lock_child(dentry);
}
void aufs_write_unlock(struct dentry *dentry)
{
di_write_unlock(dentry);
si_write_unlock(dentry->d_sb);
}
int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags)
{
int err;
unsigned int sigen;
struct super_block *sb;
sb = d1->d_sb;
err = si_read_lock(sb, flags);
if (unlikely(err))
goto out;
di_write_lock2_child(d1, d2, au_ftest_lock(flags, DIRS));
if (au_ftest_lock(flags, GEN)) {
sigen = au_sigen(sb);
err = au_digen_test(d1, sigen);
AuDebugOn(!err && au_dbrange_test(d1));
if (!err) {
err = au_digen_test(d2, sigen);
AuDebugOn(!err && au_dbrange_test(d2));
}
if (unlikely(err))
aufs_read_and_write_unlock2(d1, d2);
}
out:
return err;
}
void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2)
{
di_write_unlock2(d1, d2);
si_read_unlock(d1->d_sb);
}

1047
fs/aufs/super.c Normal file

File diff suppressed because it is too large Load Diff

589
fs/aufs/super.h Normal file
View File

@ -0,0 +1,589 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* super_block operations
*/
#ifndef __AUFS_SUPER_H__
#define __AUFS_SUPER_H__
#ifdef __KERNEL__
#include <linux/fs.h>
#include <linux/kobject.h>
#include "hbl.h"
#include "lcnt.h"
#include "rwsem.h"
#include "wkq.h"
/* policies to select one among multiple writable branches */
struct au_wbr_copyup_operations {
int (*copyup)(struct dentry *dentry);
};
#define AuWbr_DIR 1 /* target is a dir */
#define AuWbr_PARENT (1 << 1) /* always require a parent */
#define au_ftest_wbr(flags, name) ((flags) & AuWbr_##name)
#define au_fset_wbr(flags, name) { (flags) |= AuWbr_##name; }
#define au_fclr_wbr(flags, name) { (flags) &= ~AuWbr_##name; }
struct au_wbr_create_operations {
int (*create)(struct dentry *dentry, unsigned int flags);
int (*init)(struct super_block *sb);
int (*fin)(struct super_block *sb);
};
struct au_wbr_mfs {
struct mutex mfs_lock; /* protect this structure */
unsigned long mfs_jiffy;
unsigned long mfs_expire;
aufs_bindex_t mfs_bindex;
unsigned long long mfsrr_bytes;
unsigned long long mfsrr_watermark;
};
#define AuPlink_NHASH 100
static inline int au_plink_hash(ino_t ino)
{
return ino % AuPlink_NHASH;
}
/* File-based Hierarchical Storage Management */
struct au_fhsm {
#ifdef CONFIG_AUFS_FHSM
/* allow only one process who can receive the notification */
spinlock_t fhsm_spin;
pid_t fhsm_pid;
wait_queue_head_t fhsm_wqh;
atomic_t fhsm_readable;
/* these are protected by si_rwsem */
unsigned long fhsm_expire;
aufs_bindex_t fhsm_bottom;
#endif
};
struct au_branch;
struct au_sbinfo {
/* nowait tasks in the system-wide workqueue */
struct au_nowait_tasks si_nowait;
/*
* tried sb->s_umount, but failed due to the dependency between i_mutex.
* rwsem for au_sbinfo is necessary.
*/
struct au_rwsem si_rwsem;
/*
* dirty approach to protect sb->sb_inodes and ->s_files (gone) from
* remount.
*/
au_lcnt_t si_ninodes, si_nfiles;
/* branch management */
unsigned int si_generation;
/* see AuSi_ flags */
unsigned char au_si_status;
aufs_bindex_t si_bbot;
/* dirty trick to keep br_id plus */
unsigned int si_last_br_id :
sizeof(aufs_bindex_t) * BITS_PER_BYTE - 1;
struct au_branch **si_branch;
/* policy to select a writable branch */
unsigned char si_wbr_copyup;
unsigned char si_wbr_create;
struct au_wbr_copyup_operations *si_wbr_copyup_ops;
struct au_wbr_create_operations *si_wbr_create_ops;
/* round robin */
atomic_t si_wbr_rr_next;
/* most free space */
struct au_wbr_mfs si_wbr_mfs;
/* File-based Hierarchical Storage Management */
struct au_fhsm si_fhsm;
/* mount flags */
/* include/asm-ia64/siginfo.h defines a macro named si_flags */
unsigned int si_mntflags;
/* external inode number (bitmap and translation table) */
vfs_readf_t si_xread;
vfs_writef_t si_xwrite;
loff_t si_ximaxent; /* max entries in a xino */
struct file *si_xib;
struct mutex si_xib_mtx; /* protect xib members */
unsigned long *si_xib_buf;
unsigned long si_xib_last_pindex;
int si_xib_next_bit;
unsigned long si_xino_jiffy;
unsigned long si_xino_expire;
/* reserved for future use */
/* unsigned long long si_xib_limit; */ /* Max xib file size */
#ifdef CONFIG_AUFS_EXPORT
/* i_generation */
/* todo: make xigen file an array to support many inode numbers */
struct file *si_xigen;
atomic_t si_xigen_next;
#endif
/* dirty trick to support atomic_open */
struct hlist_bl_head si_aopen;
/* vdir parameters */
unsigned long si_rdcache; /* max cache time in jiffies */
unsigned int si_rdblk; /* deblk size */
unsigned int si_rdhash; /* hash size */
/*
* If the number of whiteouts are larger than si_dirwh, leave all of
* them after au_whtmp_ren to reduce the cost of rmdir(2).
* future fsck.aufs or kernel thread will remove them later.
* Otherwise, remove all whiteouts and the dir in rmdir(2).
*/
unsigned int si_dirwh;
/* pseudo_link list */
struct hlist_bl_head si_plink[AuPlink_NHASH];
wait_queue_head_t si_plink_wq;
spinlock_t si_plink_maint_lock;
pid_t si_plink_maint_pid;
/* file list */
struct hlist_bl_head si_files;
/* with/without getattr, brother of sb->s_d_op */
const struct inode_operations *si_iop_array;
/*
* sysfs and lifetime management.
* this is not a small structure and it may be a waste of memory in case
* of sysfs is disabled, particularly when many aufs-es are mounted.
* but using sysfs is majority.
*/
struct kobject si_kobj;
#ifdef CONFIG_DEBUG_FS
struct dentry *si_dbgaufs;
struct dentry *si_dbgaufs_plink;
struct dentry *si_dbgaufs_xib;
#ifdef CONFIG_AUFS_EXPORT
struct dentry *si_dbgaufs_xigen;
#endif
#endif
#ifdef CONFIG_AUFS_SBILIST
struct hlist_bl_node si_list;
#endif
/* dirty, necessary for unmounting, sysfs and sysrq */
struct super_block *si_sb;
};
/* sbinfo status flags */
/*
* set true when refresh_dirs() failed at remount time.
* then try refreshing dirs at access time again.
* if it is false, refreshing dirs at access time is unnecessary
*/
#define AuSi_FAILED_REFRESH_DIR 1
#define AuSi_FHSM (1 << 1) /* fhsm is active now */
#define AuSi_NO_DREVAL (1 << 2) /* disable all d_revalidate */
#ifndef CONFIG_AUFS_FHSM
#undef AuSi_FHSM
#define AuSi_FHSM 0
#endif
static inline unsigned char au_do_ftest_si(struct au_sbinfo *sbi,
unsigned int flag)
{
AuRwMustAnyLock(&sbi->si_rwsem);
return sbi->au_si_status & flag;
}
#define au_ftest_si(sbinfo, name) au_do_ftest_si(sbinfo, AuSi_##name)
#define au_fset_si(sbinfo, name) do { \
AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
(sbinfo)->au_si_status |= AuSi_##name; \
} while (0)
#define au_fclr_si(sbinfo, name) do { \
AuRwMustWriteLock(&(sbinfo)->si_rwsem); \
(sbinfo)->au_si_status &= ~AuSi_##name; \
} while (0)
/* ---------------------------------------------------------------------- */
/* policy to select one among writable branches */
#define AuWbrCopyup(sbinfo, ...) \
((sbinfo)->si_wbr_copyup_ops->copyup(__VA_ARGS__))
#define AuWbrCreate(sbinfo, ...) \
((sbinfo)->si_wbr_create_ops->create(__VA_ARGS__))
/* flags for si_read_lock()/aufs_read_lock()/di_read_lock() */
#define AuLock_DW 1 /* write-lock dentry */
#define AuLock_IR (1 << 1) /* read-lock inode */
#define AuLock_IW (1 << 2) /* write-lock inode */
#define AuLock_FLUSH (1 << 3) /* wait for 'nowait' tasks */
#define AuLock_DIRS (1 << 4) /* target is a pair of dirs */
/* except RENAME_EXCHANGE */
#define AuLock_NOPLM (1 << 5) /* return err in plm mode */
#define AuLock_NOPLMW (1 << 6) /* wait for plm mode ends */
#define AuLock_GEN (1 << 7) /* test digen/iigen */
#define au_ftest_lock(flags, name) ((flags) & AuLock_##name)
#define au_fset_lock(flags, name) \
do { (flags) |= AuLock_##name; } while (0)
#define au_fclr_lock(flags, name) \
do { (flags) &= ~AuLock_##name; } while (0)
/* ---------------------------------------------------------------------- */
/* super.c */
extern struct file_system_type aufs_fs_type;
struct inode *au_iget_locked(struct super_block *sb, ino_t ino);
typedef unsigned long long (*au_arraycb_t)(struct super_block *sb, void *array,
unsigned long long max, void *arg);
void *au_array_alloc(unsigned long long *hint, au_arraycb_t cb,
struct super_block *sb, void *arg);
struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max);
void au_iarray_free(struct inode **a, unsigned long long max);
/* sbinfo.c */
void au_si_free(struct kobject *kobj);
int au_si_alloc(struct super_block *sb);
int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr, int may_shrink);
unsigned int au_sigen_inc(struct super_block *sb);
aufs_bindex_t au_new_br_id(struct super_block *sb);
int si_read_lock(struct super_block *sb, int flags);
int si_write_lock(struct super_block *sb, int flags);
int aufs_read_lock(struct dentry *dentry, int flags);
void aufs_read_unlock(struct dentry *dentry, int flags);
void aufs_write_lock(struct dentry *dentry);
void aufs_write_unlock(struct dentry *dentry);
int aufs_read_and_write_lock2(struct dentry *d1, struct dentry *d2, int flags);
void aufs_read_and_write_unlock2(struct dentry *d1, struct dentry *d2);
/* wbr_policy.c */
extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
extern struct au_wbr_create_operations au_wbr_create_ops[];
int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex);
int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop);
/* mvdown.c */
int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *arg);
#ifdef CONFIG_AUFS_FHSM
/* fhsm.c */
static inline pid_t au_fhsm_pid(struct au_fhsm *fhsm)
{
pid_t pid;
spin_lock(&fhsm->fhsm_spin);
pid = fhsm->fhsm_pid;
spin_unlock(&fhsm->fhsm_spin);
return pid;
}
void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force);
void au_fhsm_wrote_all(struct super_block *sb, int force);
int au_fhsm_fd(struct super_block *sb, int oflags);
int au_fhsm_br_alloc(struct au_branch *br);
void au_fhsm_set_bottom(struct super_block *sb, aufs_bindex_t bindex);
void au_fhsm_fin(struct super_block *sb);
void au_fhsm_init(struct au_sbinfo *sbinfo);
void au_fhsm_set(struct au_sbinfo *sbinfo, unsigned int sec);
void au_fhsm_show(struct seq_file *seq, struct au_sbinfo *sbinfo);
#else
AuStubVoid(au_fhsm_wrote, struct super_block *sb, aufs_bindex_t bindex,
int force)
AuStubVoid(au_fhsm_wrote_all, struct super_block *sb, int force)
AuStub(int, au_fhsm_fd, return -EOPNOTSUPP, struct super_block *sb, int oflags)
AuStub(pid_t, au_fhsm_pid, return 0, struct au_fhsm *fhsm)
AuStubInt0(au_fhsm_br_alloc, struct au_branch *br)
AuStubVoid(au_fhsm_set_bottom, struct super_block *sb, aufs_bindex_t bindex)
AuStubVoid(au_fhsm_fin, struct super_block *sb)
AuStubVoid(au_fhsm_init, struct au_sbinfo *sbinfo)
AuStubVoid(au_fhsm_set, struct au_sbinfo *sbinfo, unsigned int sec)
AuStubVoid(au_fhsm_show, struct seq_file *seq, struct au_sbinfo *sbinfo)
#endif
/* ---------------------------------------------------------------------- */
static inline struct au_sbinfo *au_sbi(struct super_block *sb)
{
return sb->s_fs_info;
}
/* ---------------------------------------------------------------------- */
#ifdef CONFIG_AUFS_EXPORT
int au_test_nfsd(void);
void au_export_init(struct super_block *sb);
void au_xigen_inc(struct inode *inode);
int au_xigen_new(struct inode *inode);
int au_xigen_set(struct super_block *sb, struct path *path);
void au_xigen_clr(struct super_block *sb);
static inline int au_busy_or_stale(void)
{
if (!au_test_nfsd())
return -EBUSY;
return -ESTALE;
}
#else
AuStubInt0(au_test_nfsd, void)
AuStubVoid(au_export_init, struct super_block *sb)
AuStubVoid(au_xigen_inc, struct inode *inode)
AuStubInt0(au_xigen_new, struct inode *inode)
AuStubInt0(au_xigen_set, struct super_block *sb, struct path *path)
AuStubVoid(au_xigen_clr, struct super_block *sb)
AuStub(int, au_busy_or_stale, return -EBUSY, void)
#endif /* CONFIG_AUFS_EXPORT */
/* ---------------------------------------------------------------------- */
#ifdef CONFIG_AUFS_SBILIST
/* module.c */
extern struct hlist_bl_head au_sbilist;
static inline void au_sbilist_init(void)
{
INIT_HLIST_BL_HEAD(&au_sbilist);
}
static inline void au_sbilist_add(struct super_block *sb)
{
au_hbl_add(&au_sbi(sb)->si_list, &au_sbilist);
}
static inline void au_sbilist_del(struct super_block *sb)
{
au_hbl_del(&au_sbi(sb)->si_list, &au_sbilist);
}
#ifdef CONFIG_AUFS_MAGIC_SYSRQ
static inline void au_sbilist_lock(void)
{
hlist_bl_lock(&au_sbilist);
}
static inline void au_sbilist_unlock(void)
{
hlist_bl_unlock(&au_sbilist);
}
#define AuGFP_SBILIST GFP_ATOMIC
#else
AuStubVoid(au_sbilist_lock, void)
AuStubVoid(au_sbilist_unlock, void)
#define AuGFP_SBILIST GFP_NOFS
#endif /* CONFIG_AUFS_MAGIC_SYSRQ */
#else
AuStubVoid(au_sbilist_init, void)
AuStubVoid(au_sbilist_add, struct super_block *sb)
AuStubVoid(au_sbilist_del, struct super_block *sb)
AuStubVoid(au_sbilist_lock, void)
AuStubVoid(au_sbilist_unlock, void)
#define AuGFP_SBILIST GFP_NOFS
#endif
/* ---------------------------------------------------------------------- */
static inline void dbgaufs_si_null(struct au_sbinfo *sbinfo)
{
/*
* This function is a dynamic '__init' function actually,
* so the tiny check for si_rwsem is unnecessary.
*/
/* AuRwMustWriteLock(&sbinfo->si_rwsem); */
#ifdef CONFIG_DEBUG_FS
sbinfo->si_dbgaufs = NULL;
sbinfo->si_dbgaufs_plink = NULL;
sbinfo->si_dbgaufs_xib = NULL;
#ifdef CONFIG_AUFS_EXPORT
sbinfo->si_dbgaufs_xigen = NULL;
#endif
#endif
}
/* ---------------------------------------------------------------------- */
/* current->atomic_flags */
/* this value should never corrupt the ones defined in linux/sched.h */
#define PFA_AUFS 0x10
TASK_PFA_TEST(AUFS, test_aufs) /* task_test_aufs */
TASK_PFA_SET(AUFS, aufs) /* task_set_aufs */
TASK_PFA_CLEAR(AUFS, aufs) /* task_clear_aufs */
static inline int si_pid_test(struct super_block *sb)
{
return !!task_test_aufs(current);
}
static inline void si_pid_clr(struct super_block *sb)
{
AuDebugOn(!task_test_aufs(current));
task_clear_aufs(current);
}
static inline void si_pid_set(struct super_block *sb)
{
AuDebugOn(task_test_aufs(current));
task_set_aufs(current);
}
/* ---------------------------------------------------------------------- */
/* lock superblock. mainly for entry point functions */
#define __si_read_lock(sb) au_rw_read_lock(&au_sbi(sb)->si_rwsem)
#define __si_write_lock(sb) au_rw_write_lock(&au_sbi(sb)->si_rwsem)
#define __si_read_trylock(sb) au_rw_read_trylock(&au_sbi(sb)->si_rwsem)
#define __si_write_trylock(sb) au_rw_write_trylock(&au_sbi(sb)->si_rwsem)
/*
#define __si_read_trylock_nested(sb) \
au_rw_read_trylock_nested(&au_sbi(sb)->si_rwsem)
#define __si_write_trylock_nested(sb) \
au_rw_write_trylock_nested(&au_sbi(sb)->si_rwsem)
*/
#define __si_read_unlock(sb) au_rw_read_unlock(&au_sbi(sb)->si_rwsem)
#define __si_write_unlock(sb) au_rw_write_unlock(&au_sbi(sb)->si_rwsem)
#define __si_downgrade_lock(sb) au_rw_dgrade_lock(&au_sbi(sb)->si_rwsem)
#define SiMustNoWaiters(sb) AuRwMustNoWaiters(&au_sbi(sb)->si_rwsem)
#define SiMustAnyLock(sb) AuRwMustAnyLock(&au_sbi(sb)->si_rwsem)
#define SiMustWriteLock(sb) AuRwMustWriteLock(&au_sbi(sb)->si_rwsem)
static inline void si_noflush_read_lock(struct super_block *sb)
{
__si_read_lock(sb);
si_pid_set(sb);
}
static inline int si_noflush_read_trylock(struct super_block *sb)
{
int locked;
locked = __si_read_trylock(sb);
if (locked)
si_pid_set(sb);
return locked;
}
static inline void si_noflush_write_lock(struct super_block *sb)
{
__si_write_lock(sb);
si_pid_set(sb);
}
static inline int si_noflush_write_trylock(struct super_block *sb)
{
int locked;
locked = __si_write_trylock(sb);
if (locked)
si_pid_set(sb);
return locked;
}
#if 0 /* reserved */
static inline int si_read_trylock(struct super_block *sb, int flags)
{
if (au_ftest_lock(flags, FLUSH))
au_nwt_flush(&au_sbi(sb)->si_nowait);
return si_noflush_read_trylock(sb);
}
#endif
static inline void si_read_unlock(struct super_block *sb)
{
si_pid_clr(sb);
__si_read_unlock(sb);
}
#if 0 /* reserved */
static inline int si_write_trylock(struct super_block *sb, int flags)
{
if (au_ftest_lock(flags, FLUSH))
au_nwt_flush(&au_sbi(sb)->si_nowait);
return si_noflush_write_trylock(sb);
}
#endif
static inline void si_write_unlock(struct super_block *sb)
{
si_pid_clr(sb);
__si_write_unlock(sb);
}
#if 0 /* reserved */
static inline void si_downgrade_lock(struct super_block *sb)
{
__si_downgrade_lock(sb);
}
#endif
/* ---------------------------------------------------------------------- */
static inline aufs_bindex_t au_sbbot(struct super_block *sb)
{
SiMustAnyLock(sb);
return au_sbi(sb)->si_bbot;
}
static inline unsigned int au_mntflags(struct super_block *sb)
{
SiMustAnyLock(sb);
return au_sbi(sb)->si_mntflags;
}
static inline unsigned int au_sigen(struct super_block *sb)
{
SiMustAnyLock(sb);
return au_sbi(sb)->si_generation;
}
static inline struct au_branch *au_sbr(struct super_block *sb,
aufs_bindex_t bindex)
{
SiMustAnyLock(sb);
return au_sbi(sb)->si_branch[0 + bindex];
}
static inline loff_t au_xi_maxent(struct super_block *sb)
{
SiMustAnyLock(sb);
return au_sbi(sb)->si_ximaxent;
}
#endif /* __KERNEL__ */
#endif /* __AUFS_SUPER_H__ */

93
fs/aufs/sysaufs.c Normal file
View File

@ -0,0 +1,93 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* sysfs interface and lifetime management
* they are necessary regardless sysfs is disabled.
*/
#include <linux/random.h>
#include "aufs.h"
unsigned long sysaufs_si_mask;
struct kset *sysaufs_kset;
#define AuSiAttr(_name) { \
.attr = { .name = __stringify(_name), .mode = 0444 }, \
.show = sysaufs_si_##_name, \
}
static struct sysaufs_si_attr sysaufs_si_attr_xi_path = AuSiAttr(xi_path);
struct attribute *sysaufs_si_attrs[] = {
&sysaufs_si_attr_xi_path.attr,
NULL,
};
static const struct sysfs_ops au_sbi_ops = {
.show = sysaufs_si_show
};
static struct kobj_type au_sbi_ktype = {
.release = au_si_free,
.sysfs_ops = &au_sbi_ops,
.default_attrs = sysaufs_si_attrs
};
/* ---------------------------------------------------------------------- */
int sysaufs_si_init(struct au_sbinfo *sbinfo)
{
int err;
sbinfo->si_kobj.kset = sysaufs_kset;
/* cf. sysaufs_name() */
err = kobject_init_and_add
(&sbinfo->si_kobj, &au_sbi_ktype, /*&sysaufs_kset->kobj*/NULL,
SysaufsSiNamePrefix "%lx", sysaufs_si_id(sbinfo));
return err;
}
void sysaufs_fin(void)
{
sysfs_remove_group(&sysaufs_kset->kobj, sysaufs_attr_group);
kset_unregister(sysaufs_kset);
}
int __init sysaufs_init(void)
{
int err;
do {
get_random_bytes(&sysaufs_si_mask, sizeof(sysaufs_si_mask));
} while (!sysaufs_si_mask);
err = -EINVAL;
sysaufs_kset = kset_create_and_add(AUFS_NAME, NULL, fs_kobj);
if (unlikely(!sysaufs_kset))
goto out;
err = PTR_ERR(sysaufs_kset);
if (IS_ERR(sysaufs_kset))
goto out;
err = sysfs_create_group(&sysaufs_kset->kobj, sysaufs_attr_group);
if (unlikely(err))
kset_unregister(sysaufs_kset);
out:
return err;
}

102
fs/aufs/sysaufs.h Normal file
View File

@ -0,0 +1,102 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* sysfs interface and mount lifetime management
*/
#ifndef __SYSAUFS_H__
#define __SYSAUFS_H__
#ifdef __KERNEL__
#include <linux/sysfs.h>
#include "module.h"
struct super_block;
struct au_sbinfo;
struct sysaufs_si_attr {
struct attribute attr;
int (*show)(struct seq_file *seq, struct super_block *sb);
};
/* ---------------------------------------------------------------------- */
/* sysaufs.c */
extern unsigned long sysaufs_si_mask;
extern struct kset *sysaufs_kset;
extern struct attribute *sysaufs_si_attrs[];
int sysaufs_si_init(struct au_sbinfo *sbinfo);
int __init sysaufs_init(void);
void sysaufs_fin(void);
/* ---------------------------------------------------------------------- */
/* some people doesn't like to show a pointer in kernel */
static inline unsigned long sysaufs_si_id(struct au_sbinfo *sbinfo)
{
return sysaufs_si_mask ^ (unsigned long)sbinfo;
}
#define SysaufsSiNamePrefix "si_"
#define SysaufsSiNameLen (sizeof(SysaufsSiNamePrefix) + 16)
static inline void sysaufs_name(struct au_sbinfo *sbinfo, char *name)
{
snprintf(name, SysaufsSiNameLen, SysaufsSiNamePrefix "%lx",
sysaufs_si_id(sbinfo));
}
struct au_branch;
#ifdef CONFIG_SYSFS
/* sysfs.c */
extern struct attribute_group *sysaufs_attr_group;
int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb);
ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
char *buf);
long au_brinfo_ioctl(struct file *file, unsigned long arg);
#ifdef CONFIG_COMPAT
long au_brinfo_compat_ioctl(struct file *file, unsigned long arg);
#endif
void sysaufs_br_init(struct au_branch *br);
void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex);
void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex);
#define sysaufs_brs_init() do {} while (0)
#else
#define sysaufs_attr_group NULL
AuStubInt0(sysaufs_si_xi_path, struct seq_file *seq, struct super_block *sb)
AuStub(ssize_t, sysaufs_si_show, return 0, struct kobject *kobj,
struct attribute *attr, char *buf)
AuStubVoid(sysaufs_br_init, struct au_branch *br)
AuStubVoid(sysaufs_brs_add, struct super_block *sb, aufs_bindex_t bindex)
AuStubVoid(sysaufs_brs_del, struct super_block *sb, aufs_bindex_t bindex)
static inline void sysaufs_brs_init(void)
{
sysaufs_brs = 0;
}
#endif /* CONFIG_SYSFS */
#endif /* __KERNEL__ */
#endif /* __SYSAUFS_H__ */

374
fs/aufs/sysfs.c Normal file
View File

@ -0,0 +1,374 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* sysfs interface
*/
#include <linux/compat.h>
#include <linux/seq_file.h>
#include "aufs.h"
#ifdef CONFIG_AUFS_FS_MODULE
/* this entry violates the "one line per file" policy of sysfs */
static ssize_t config_show(struct kobject *kobj, struct kobj_attribute *attr,
char *buf)
{
ssize_t err;
static char *conf =
/* this file is generated at compiling */
#include "conf.str"
;
err = snprintf(buf, PAGE_SIZE, conf);
if (unlikely(err >= PAGE_SIZE))
err = -EFBIG;
return err;
}
static struct kobj_attribute au_config_attr = __ATTR_RO(config);
#endif
static struct attribute *au_attr[] = {
#ifdef CONFIG_AUFS_FS_MODULE
&au_config_attr.attr,
#endif
NULL, /* need to NULL terminate the list of attributes */
};
static struct attribute_group sysaufs_attr_group_body = {
.attrs = au_attr
};
struct attribute_group *sysaufs_attr_group = &sysaufs_attr_group_body;
/* ---------------------------------------------------------------------- */
int sysaufs_si_xi_path(struct seq_file *seq, struct super_block *sb)
{
int err;
SiMustAnyLock(sb);
err = 0;
if (au_opt_test(au_mntflags(sb), XINO)) {
err = au_xino_path(seq, au_sbi(sb)->si_xib);
seq_putc(seq, '\n');
}
return err;
}
/*
* the lifetime of branch is independent from the entry under sysfs.
* sysfs handles the lifetime of the entry, and never call ->show() after it is
* unlinked.
*/
static int sysaufs_si_br(struct seq_file *seq, struct super_block *sb,
aufs_bindex_t bindex, int idx)
{
int err;
struct path path;
struct dentry *root;
struct au_branch *br;
au_br_perm_str_t perm;
AuDbg("b%d\n", bindex);
err = 0;
root = sb->s_root;
di_read_lock_parent(root, !AuLock_IR);
br = au_sbr(sb, bindex);
switch (idx) {
case AuBrSysfs_BR:
path.mnt = au_br_mnt(br);
path.dentry = au_h_dptr(root, bindex);
err = au_seq_path(seq, &path);
if (!err) {
au_optstr_br_perm(&perm, br->br_perm);
seq_printf(seq, "=%s\n", perm.a);
}
break;
case AuBrSysfs_BRID:
seq_printf(seq, "%d\n", br->br_id);
break;
}
di_read_unlock(root, !AuLock_IR);
if (unlikely(err || seq_has_overflowed(seq)))
err = -E2BIG;
return err;
}
/* ---------------------------------------------------------------------- */
static struct seq_file *au_seq(char *p, ssize_t len)
{
struct seq_file *seq;
seq = kzalloc(sizeof(*seq), GFP_NOFS);
if (seq) {
/* mutex_init(&seq.lock); */
seq->buf = p;
seq->size = len;
return seq; /* success */
}
seq = ERR_PTR(-ENOMEM);
return seq;
}
#define SysaufsBr_PREFIX "br"
#define SysaufsBrid_PREFIX "brid"
/* todo: file size may exceed PAGE_SIZE */
ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
ssize_t err;
int idx;
long l;
aufs_bindex_t bbot;
struct au_sbinfo *sbinfo;
struct super_block *sb;
struct seq_file *seq;
char *name;
struct attribute **cattr;
sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
sb = sbinfo->si_sb;
/*
* prevent a race condition between sysfs and aufs.
* for instance, sysfs_file_read() calls sysfs_get_active_two() which
* prohibits maintaining the sysfs entries.
* hew we acquire read lock after sysfs_get_active_two().
* on the other hand, the remount process may maintain the sysfs/aufs
* entries after acquiring write lock.
* it can cause a deadlock.
* simply we gave up processing read here.
*/
err = -EBUSY;
if (unlikely(!si_noflush_read_trylock(sb)))
goto out;
seq = au_seq(buf, PAGE_SIZE);
err = PTR_ERR(seq);
if (IS_ERR(seq))
goto out_unlock;
name = (void *)attr->name;
cattr = sysaufs_si_attrs;
while (*cattr) {
if (!strcmp(name, (*cattr)->name)) {
err = container_of(*cattr, struct sysaufs_si_attr, attr)
->show(seq, sb);
goto out_seq;
}
cattr++;
}
if (!strncmp(name, SysaufsBrid_PREFIX,
sizeof(SysaufsBrid_PREFIX) - 1)) {
idx = AuBrSysfs_BRID;
name += sizeof(SysaufsBrid_PREFIX) - 1;
} else if (!strncmp(name, SysaufsBr_PREFIX,
sizeof(SysaufsBr_PREFIX) - 1)) {
idx = AuBrSysfs_BR;
name += sizeof(SysaufsBr_PREFIX) - 1;
} else
BUG();
err = kstrtol(name, 10, &l);
if (!err) {
bbot = au_sbbot(sb);
if (l <= bbot)
err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l, idx);
else
err = -ENOENT;
}
out_seq:
if (!err) {
err = seq->count;
/* sysfs limit */
if (unlikely(err == PAGE_SIZE))
err = -EFBIG;
}
au_kfree_rcu(seq);
out_unlock:
si_read_unlock(sb);
out:
return err;
}
/* ---------------------------------------------------------------------- */
static int au_brinfo(struct super_block *sb, union aufs_brinfo __user *arg)
{
int err;
int16_t brid;
aufs_bindex_t bindex, bbot;
size_t sz;
char *buf;
struct seq_file *seq;
struct au_branch *br;
si_read_lock(sb, AuLock_FLUSH);
bbot = au_sbbot(sb);
err = bbot + 1;
if (!arg)
goto out;
err = -ENOMEM;
buf = (void *)__get_free_page(GFP_NOFS);
if (unlikely(!buf))
goto out;
seq = au_seq(buf, PAGE_SIZE);
err = PTR_ERR(seq);
if (IS_ERR(seq))
goto out_buf;
sz = sizeof(*arg) - offsetof(union aufs_brinfo, path);
for (bindex = 0; bindex <= bbot; bindex++, arg++) {
/* VERIFY_WRITE */
err = !access_ok(arg, sizeof(*arg));
if (unlikely(err))
break;
br = au_sbr(sb, bindex);
brid = br->br_id;
BUILD_BUG_ON(sizeof(brid) != sizeof(arg->id));
err = __put_user(brid, &arg->id);
if (unlikely(err))
break;
BUILD_BUG_ON(sizeof(br->br_perm) != sizeof(arg->perm));
err = __put_user(br->br_perm, &arg->perm);
if (unlikely(err))
break;
err = au_seq_path(seq, &br->br_path);
if (unlikely(err))
break;
seq_putc(seq, '\0');
if (!seq_has_overflowed(seq)) {
err = copy_to_user(arg->path, seq->buf, seq->count);
seq->count = 0;
if (unlikely(err))
break;
} else {
err = -E2BIG;
goto out_seq;
}
}
if (unlikely(err))
err = -EFAULT;
out_seq:
au_kfree_rcu(seq);
out_buf:
free_page((unsigned long)buf);
out:
si_read_unlock(sb);
return err;
}
long au_brinfo_ioctl(struct file *file, unsigned long arg)
{
return au_brinfo(file->f_path.dentry->d_sb, (void __user *)arg);
}
#ifdef CONFIG_COMPAT
long au_brinfo_compat_ioctl(struct file *file, unsigned long arg)
{
return au_brinfo(file->f_path.dentry->d_sb, compat_ptr(arg));
}
#endif
/* ---------------------------------------------------------------------- */
void sysaufs_br_init(struct au_branch *br)
{
int i;
struct au_brsysfs *br_sysfs;
struct attribute *attr;
br_sysfs = br->br_sysfs;
for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
attr = &br_sysfs->attr;
sysfs_attr_init(attr);
attr->name = br_sysfs->name;
attr->mode = 0444;
br_sysfs++;
}
}
void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
{
struct au_branch *br;
struct kobject *kobj;
struct au_brsysfs *br_sysfs;
int i;
aufs_bindex_t bbot;
if (!sysaufs_brs)
return;
kobj = &au_sbi(sb)->si_kobj;
bbot = au_sbbot(sb);
for (; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
br_sysfs = br->br_sysfs;
for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
sysfs_remove_file(kobj, &br_sysfs->attr);
br_sysfs++;
}
}
}
void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
{
int err, i;
aufs_bindex_t bbot;
struct kobject *kobj;
struct au_branch *br;
struct au_brsysfs *br_sysfs;
if (!sysaufs_brs)
return;
kobj = &au_sbi(sb)->si_kobj;
bbot = au_sbbot(sb);
for (; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
br_sysfs = br->br_sysfs;
snprintf(br_sysfs[AuBrSysfs_BR].name, sizeof(br_sysfs->name),
SysaufsBr_PREFIX "%d", bindex);
snprintf(br_sysfs[AuBrSysfs_BRID].name, sizeof(br_sysfs->name),
SysaufsBrid_PREFIX "%d", bindex);
for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
err = sysfs_create_file(kobj, &br_sysfs->attr);
if (unlikely(err))
pr_warn("failed %s under sysfs(%d)\n",
br_sysfs->name, err);
br_sysfs++;
}
}
}

149
fs/aufs/sysrq.c Normal file
View File

@ -0,0 +1,149 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* magic sysrq handler
*/
/* #include <linux/sysrq.h> */
#include <linux/writeback.h>
#include "aufs.h"
/* ---------------------------------------------------------------------- */
static void sysrq_sb(struct super_block *sb)
{
char *plevel;
struct au_sbinfo *sbinfo;
struct file *file;
struct hlist_bl_head *files;
struct hlist_bl_node *pos;
struct au_finfo *finfo;
struct inode *i;
plevel = au_plevel;
au_plevel = KERN_WARNING;
/* since we define pr_fmt, call printk directly */
#define pr(str) printk(KERN_WARNING AUFS_NAME ": " str)
sbinfo = au_sbi(sb);
printk(KERN_WARNING "si=%lx\n", sysaufs_si_id(sbinfo));
pr("superblock\n");
au_dpri_sb(sb);
#if 0 /* reserved */
do {
int err, i, j, ndentry;
struct au_dcsub_pages dpages;
struct au_dpage *dpage;
err = au_dpages_init(&dpages, GFP_ATOMIC);
if (unlikely(err))
break;
err = au_dcsub_pages(&dpages, sb->s_root, NULL, NULL);
if (!err)
for (i = 0; i < dpages.ndpage; i++) {
dpage = dpages.dpages + i;
ndentry = dpage->ndentry;
for (j = 0; j < ndentry; j++)
au_dpri_dentry(dpage->dentries[j]);
}
au_dpages_free(&dpages);
} while (0);
#endif
pr("isolated inode\n");
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(i, &sb->s_inodes, i_sb_list) {
spin_lock(&i->i_lock);
if (hlist_empty(&i->i_dentry))
au_dpri_inode(i);
spin_unlock(&i->i_lock);
}
spin_unlock(&sb->s_inode_list_lock);
pr("files\n");
files = &au_sbi(sb)->si_files;
hlist_bl_lock(files);
hlist_bl_for_each_entry(finfo, pos, files, fi_hlist) {
umode_t mode;
file = finfo->fi_file;
mode = file_inode(file)->i_mode;
if (!special_file(mode))
au_dpri_file(file);
}
hlist_bl_unlock(files);
pr("done\n");
#undef pr
au_plevel = plevel;
}
/* ---------------------------------------------------------------------- */
/* module parameter */
static char *aufs_sysrq_key = "a";
module_param_named(sysrq, aufs_sysrq_key, charp, 0444);
MODULE_PARM_DESC(sysrq, "MagicSysRq key for " AUFS_NAME);
static void au_sysrq(int key __maybe_unused)
{
struct au_sbinfo *sbinfo;
struct hlist_bl_node *pos;
lockdep_off();
au_sbilist_lock();
hlist_bl_for_each_entry(sbinfo, pos, &au_sbilist, si_list)
sysrq_sb(sbinfo->si_sb);
au_sbilist_unlock();
lockdep_on();
}
static struct sysrq_key_op au_sysrq_op = {
.handler = au_sysrq,
.help_msg = "Aufs",
.action_msg = "Aufs",
.enable_mask = SYSRQ_ENABLE_DUMP
};
/* ---------------------------------------------------------------------- */
int __init au_sysrq_init(void)
{
int err;
char key;
err = -1;
key = *aufs_sysrq_key;
if ('a' <= key && key <= 'z')
err = register_sysrq_key(key, &au_sysrq_op);
if (unlikely(err))
pr_err("err %d, sysrq=%c\n", err, key);
return err;
}
void au_sysrq_fin(void)
{
int err;
err = unregister_sysrq_key(*aufs_sysrq_key, &au_sysrq_op);
if (unlikely(err))
pr_err("err %d (ignored)\n", err);
}

896
fs/aufs/vdir.c Normal file
View File

@ -0,0 +1,896 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* virtual or vertical directory
*/
#include <linux/iversion.h>
#include "aufs.h"
static unsigned int calc_size(int nlen)
{
return ALIGN(sizeof(struct au_vdir_de) + nlen, sizeof(ino_t));
}
static int set_deblk_end(union au_vdir_deblk_p *p,
union au_vdir_deblk_p *deblk_end)
{
if (calc_size(0) <= deblk_end->deblk - p->deblk) {
p->de->de_str.len = 0;
/* smp_mb(); */
return 0;
}
return -1; /* error */
}
/* returns true or false */
static int is_deblk_end(union au_vdir_deblk_p *p,
union au_vdir_deblk_p *deblk_end)
{
if (calc_size(0) <= deblk_end->deblk - p->deblk)
return !p->de->de_str.len;
return 1;
}
static unsigned char *last_deblk(struct au_vdir *vdir)
{
return vdir->vd_deblk[vdir->vd_nblk - 1];
}
/* ---------------------------------------------------------------------- */
/* estimate the appropriate size for name hash table */
unsigned int au_rdhash_est(loff_t sz)
{
unsigned int n;
n = UINT_MAX;
sz >>= 10;
if (sz < n)
n = sz;
if (sz < AUFS_RDHASH_DEF)
n = AUFS_RDHASH_DEF;
/* pr_info("n %u\n", n); */
return n;
}
/*
* the allocated memory has to be freed by
* au_nhash_wh_free() or au_nhash_de_free().
*/
int au_nhash_alloc(struct au_nhash *nhash, unsigned int num_hash, gfp_t gfp)
{
struct hlist_head *head;
unsigned int u;
size_t sz;
sz = sizeof(*nhash->nh_head) * num_hash;
head = kmalloc(sz, gfp);
if (head) {
nhash->nh_num = num_hash;
nhash->nh_head = head;
for (u = 0; u < num_hash; u++)
INIT_HLIST_HEAD(head++);
return 0; /* success */
}
return -ENOMEM;
}
static void nhash_count(struct hlist_head *head)
{
#if 0 /* debugging */
unsigned long n;
struct hlist_node *pos;
n = 0;
hlist_for_each(pos, head)
n++;
pr_info("%lu\n", n);
#endif
}
static void au_nhash_wh_do_free(struct hlist_head *head)
{
struct au_vdir_wh *pos;
struct hlist_node *node;
hlist_for_each_entry_safe(pos, node, head, wh_hash)
au_kfree_rcu(pos);
}
static void au_nhash_de_do_free(struct hlist_head *head)
{
struct au_vdir_dehstr *pos;
struct hlist_node *node;
hlist_for_each_entry_safe(pos, node, head, hash)
au_cache_free_vdir_dehstr(pos);
}
static void au_nhash_do_free(struct au_nhash *nhash,
void (*free)(struct hlist_head *head))
{
unsigned int n;
struct hlist_head *head;
n = nhash->nh_num;
if (!n)
return;
head = nhash->nh_head;
while (n-- > 0) {
nhash_count(head);
free(head++);
}
au_kfree_try_rcu(nhash->nh_head);
}
void au_nhash_wh_free(struct au_nhash *whlist)
{
au_nhash_do_free(whlist, au_nhash_wh_do_free);
}
static void au_nhash_de_free(struct au_nhash *delist)
{
au_nhash_do_free(delist, au_nhash_de_do_free);
}
/* ---------------------------------------------------------------------- */
int au_nhash_test_longer_wh(struct au_nhash *whlist, aufs_bindex_t btgt,
int limit)
{
int num;
unsigned int u, n;
struct hlist_head *head;
struct au_vdir_wh *pos;
num = 0;
n = whlist->nh_num;
head = whlist->nh_head;
for (u = 0; u < n; u++, head++)
hlist_for_each_entry(pos, head, wh_hash)
if (pos->wh_bindex == btgt && ++num > limit)
return 1;
return 0;
}
static struct hlist_head *au_name_hash(struct au_nhash *nhash,
unsigned char *name,
unsigned int len)
{
unsigned int v;
/* const unsigned int magic_bit = 12; */
AuDebugOn(!nhash->nh_num || !nhash->nh_head);
v = 0;
if (len > 8)
len = 8;
while (len--)
v += *name++;
/* v = hash_long(v, magic_bit); */
v %= nhash->nh_num;
return nhash->nh_head + v;
}
static int au_nhash_test_name(struct au_vdir_destr *str, const char *name,
int nlen)
{
return str->len == nlen && !memcmp(str->name, name, nlen);
}
/* returns found or not */
int au_nhash_test_known_wh(struct au_nhash *whlist, char *name, int nlen)
{
struct hlist_head *head;
struct au_vdir_wh *pos;
struct au_vdir_destr *str;
head = au_name_hash(whlist, name, nlen);
hlist_for_each_entry(pos, head, wh_hash) {
str = &pos->wh_str;
AuDbg("%.*s\n", str->len, str->name);
if (au_nhash_test_name(str, name, nlen))
return 1;
}
return 0;
}
/* returns found(true) or not */
static int test_known(struct au_nhash *delist, char *name, int nlen)
{
struct hlist_head *head;
struct au_vdir_dehstr *pos;
struct au_vdir_destr *str;
head = au_name_hash(delist, name, nlen);
hlist_for_each_entry(pos, head, hash) {
str = pos->str;
AuDbg("%.*s\n", str->len, str->name);
if (au_nhash_test_name(str, name, nlen))
return 1;
}
return 0;
}
static void au_shwh_init_wh(struct au_vdir_wh *wh, ino_t ino,
unsigned char d_type)
{
#ifdef CONFIG_AUFS_SHWH
wh->wh_ino = ino;
wh->wh_type = d_type;
#endif
}
/* ---------------------------------------------------------------------- */
int au_nhash_append_wh(struct au_nhash *whlist, char *name, int nlen, ino_t ino,
unsigned int d_type, aufs_bindex_t bindex,
unsigned char shwh)
{
int err;
struct au_vdir_destr *str;
struct au_vdir_wh *wh;
AuDbg("%.*s\n", nlen, name);
AuDebugOn(!whlist->nh_num || !whlist->nh_head);
err = -ENOMEM;
wh = kmalloc(sizeof(*wh) + nlen, GFP_NOFS);
if (unlikely(!wh))
goto out;
err = 0;
wh->wh_bindex = bindex;
if (shwh)
au_shwh_init_wh(wh, ino, d_type);
str = &wh->wh_str;
str->len = nlen;
memcpy(str->name, name, nlen);
hlist_add_head(&wh->wh_hash, au_name_hash(whlist, name, nlen));
/* smp_mb(); */
out:
return err;
}
static int append_deblk(struct au_vdir *vdir)
{
int err;
unsigned long ul;
const unsigned int deblk_sz = vdir->vd_deblk_sz;
union au_vdir_deblk_p p, deblk_end;
unsigned char **o;
err = -ENOMEM;
o = au_krealloc(vdir->vd_deblk, sizeof(*o) * (vdir->vd_nblk + 1),
GFP_NOFS, /*may_shrink*/0);
if (unlikely(!o))
goto out;
vdir->vd_deblk = o;
p.deblk = kmalloc(deblk_sz, GFP_NOFS);
if (p.deblk) {
ul = vdir->vd_nblk++;
vdir->vd_deblk[ul] = p.deblk;
vdir->vd_last.ul = ul;
vdir->vd_last.p.deblk = p.deblk;
deblk_end.deblk = p.deblk + deblk_sz;
err = set_deblk_end(&p, &deblk_end);
}
out:
return err;
}
static int append_de(struct au_vdir *vdir, char *name, int nlen, ino_t ino,
unsigned int d_type, struct au_nhash *delist)
{
int err;
unsigned int sz;
const unsigned int deblk_sz = vdir->vd_deblk_sz;
union au_vdir_deblk_p p, *room, deblk_end;
struct au_vdir_dehstr *dehstr;
p.deblk = last_deblk(vdir);
deblk_end.deblk = p.deblk + deblk_sz;
room = &vdir->vd_last.p;
AuDebugOn(room->deblk < p.deblk || deblk_end.deblk <= room->deblk
|| !is_deblk_end(room, &deblk_end));
sz = calc_size(nlen);
if (unlikely(sz > deblk_end.deblk - room->deblk)) {
err = append_deblk(vdir);
if (unlikely(err))
goto out;
p.deblk = last_deblk(vdir);
deblk_end.deblk = p.deblk + deblk_sz;
/* smp_mb(); */
AuDebugOn(room->deblk != p.deblk);
}
err = -ENOMEM;
dehstr = au_cache_alloc_vdir_dehstr();
if (unlikely(!dehstr))
goto out;
dehstr->str = &room->de->de_str;
hlist_add_head(&dehstr->hash, au_name_hash(delist, name, nlen));
room->de->de_ino = ino;
room->de->de_type = d_type;
room->de->de_str.len = nlen;
memcpy(room->de->de_str.name, name, nlen);
err = 0;
room->deblk += sz;
if (unlikely(set_deblk_end(room, &deblk_end)))
err = append_deblk(vdir);
/* smp_mb(); */
out:
return err;
}
/* ---------------------------------------------------------------------- */
void au_vdir_free(struct au_vdir *vdir)
{
unsigned char **deblk;
deblk = vdir->vd_deblk;
while (vdir->vd_nblk--)
au_kfree_try_rcu(*deblk++);
au_kfree_try_rcu(vdir->vd_deblk);
au_cache_free_vdir(vdir);
}
static struct au_vdir *alloc_vdir(struct file *file)
{
struct au_vdir *vdir;
struct super_block *sb;
int err;
sb = file->f_path.dentry->d_sb;
SiMustAnyLock(sb);
err = -ENOMEM;
vdir = au_cache_alloc_vdir();
if (unlikely(!vdir))
goto out;
vdir->vd_deblk = kzalloc(sizeof(*vdir->vd_deblk), GFP_NOFS);
if (unlikely(!vdir->vd_deblk))
goto out_free;
vdir->vd_deblk_sz = au_sbi(sb)->si_rdblk;
if (!vdir->vd_deblk_sz) {
/* estimate the appropriate size for deblk */
vdir->vd_deblk_sz = au_dir_size(file, /*dentry*/NULL);
/* pr_info("vd_deblk_sz %u\n", vdir->vd_deblk_sz); */
}
vdir->vd_nblk = 0;
vdir->vd_version = 0;
vdir->vd_jiffy = 0;
err = append_deblk(vdir);
if (!err)
return vdir; /* success */
au_kfree_try_rcu(vdir->vd_deblk);
out_free:
au_cache_free_vdir(vdir);
out:
vdir = ERR_PTR(err);
return vdir;
}
static int reinit_vdir(struct au_vdir *vdir)
{
int err;
union au_vdir_deblk_p p, deblk_end;
while (vdir->vd_nblk > 1) {
au_kfree_try_rcu(vdir->vd_deblk[vdir->vd_nblk - 1]);
/* vdir->vd_deblk[vdir->vd_nblk - 1] = NULL; */
vdir->vd_nblk--;
}
p.deblk = vdir->vd_deblk[0];
deblk_end.deblk = p.deblk + vdir->vd_deblk_sz;
err = set_deblk_end(&p, &deblk_end);
/* keep vd_dblk_sz */
vdir->vd_last.ul = 0;
vdir->vd_last.p.deblk = vdir->vd_deblk[0];
vdir->vd_version = 0;
vdir->vd_jiffy = 0;
/* smp_mb(); */
return err;
}
/* ---------------------------------------------------------------------- */
#define AuFillVdir_CALLED 1
#define AuFillVdir_WHABLE (1 << 1)
#define AuFillVdir_SHWH (1 << 2)
#define au_ftest_fillvdir(flags, name) ((flags) & AuFillVdir_##name)
#define au_fset_fillvdir(flags, name) \
do { (flags) |= AuFillVdir_##name; } while (0)
#define au_fclr_fillvdir(flags, name) \
do { (flags) &= ~AuFillVdir_##name; } while (0)
#ifndef CONFIG_AUFS_SHWH
#undef AuFillVdir_SHWH
#define AuFillVdir_SHWH 0
#endif
struct fillvdir_arg {
struct dir_context ctx;
struct file *file;
struct au_vdir *vdir;
struct au_nhash delist;
struct au_nhash whlist;
aufs_bindex_t bindex;
unsigned int flags;
int err;
};
static int fillvdir(struct dir_context *ctx, const char *__name, int nlen,
loff_t offset __maybe_unused, u64 h_ino,
unsigned int d_type)
{
struct fillvdir_arg *arg = container_of(ctx, struct fillvdir_arg, ctx);
char *name = (void *)__name;
struct super_block *sb;
ino_t ino;
const unsigned char shwh = !!au_ftest_fillvdir(arg->flags, SHWH);
arg->err = 0;
sb = arg->file->f_path.dentry->d_sb;
au_fset_fillvdir(arg->flags, CALLED);
/* smp_mb(); */
if (nlen <= AUFS_WH_PFX_LEN
|| memcmp(name, AUFS_WH_PFX, AUFS_WH_PFX_LEN)) {
if (test_known(&arg->delist, name, nlen)
|| au_nhash_test_known_wh(&arg->whlist, name, nlen))
goto out; /* already exists or whiteouted */
arg->err = au_ino(sb, arg->bindex, h_ino, d_type, &ino);
if (!arg->err) {
if (unlikely(nlen > AUFS_MAX_NAMELEN))
d_type = DT_UNKNOWN;
arg->err = append_de(arg->vdir, name, nlen, ino,
d_type, &arg->delist);
}
} else if (au_ftest_fillvdir(arg->flags, WHABLE)) {
name += AUFS_WH_PFX_LEN;
nlen -= AUFS_WH_PFX_LEN;
if (au_nhash_test_known_wh(&arg->whlist, name, nlen))
goto out; /* already whiteouted */
ino = 0; /* just to suppress a warning */
if (shwh)
arg->err = au_wh_ino(sb, arg->bindex, h_ino, d_type,
&ino);
if (!arg->err) {
if (nlen <= AUFS_MAX_NAMELEN + AUFS_WH_PFX_LEN)
d_type = DT_UNKNOWN;
arg->err = au_nhash_append_wh
(&arg->whlist, name, nlen, ino, d_type,
arg->bindex, shwh);
}
}
out:
if (!arg->err)
arg->vdir->vd_jiffy = jiffies;
/* smp_mb(); */
AuTraceErr(arg->err);
return arg->err;
}
static int au_handle_shwh(struct super_block *sb, struct au_vdir *vdir,
struct au_nhash *whlist, struct au_nhash *delist)
{
#ifdef CONFIG_AUFS_SHWH
int err;
unsigned int nh, u;
struct hlist_head *head;
struct au_vdir_wh *pos;
struct hlist_node *n;
char *p, *o;
struct au_vdir_destr *destr;
AuDebugOn(!au_opt_test(au_mntflags(sb), SHWH));
err = -ENOMEM;
o = p = (void *)__get_free_page(GFP_NOFS);
if (unlikely(!p))
goto out;
err = 0;
nh = whlist->nh_num;
memcpy(p, AUFS_WH_PFX, AUFS_WH_PFX_LEN);
p += AUFS_WH_PFX_LEN;
for (u = 0; u < nh; u++) {
head = whlist->nh_head + u;
hlist_for_each_entry_safe(pos, n, head, wh_hash) {
destr = &pos->wh_str;
memcpy(p, destr->name, destr->len);
err = append_de(vdir, o, destr->len + AUFS_WH_PFX_LEN,
pos->wh_ino, pos->wh_type, delist);
if (unlikely(err))
break;
}
}
free_page((unsigned long)o);
out:
AuTraceErr(err);
return err;
#else
return 0;
#endif
}
static int au_do_read_vdir(struct fillvdir_arg *arg)
{
int err;
unsigned int rdhash;
loff_t offset;
aufs_bindex_t bbot, bindex, btop;
unsigned char shwh;
struct file *hf, *file;
struct super_block *sb;
file = arg->file;
sb = file->f_path.dentry->d_sb;
SiMustAnyLock(sb);
rdhash = au_sbi(sb)->si_rdhash;
if (!rdhash)
rdhash = au_rdhash_est(au_dir_size(file, /*dentry*/NULL));
err = au_nhash_alloc(&arg->delist, rdhash, GFP_NOFS);
if (unlikely(err))
goto out;
err = au_nhash_alloc(&arg->whlist, rdhash, GFP_NOFS);
if (unlikely(err))
goto out_delist;
err = 0;
arg->flags = 0;
shwh = 0;
if (au_opt_test(au_mntflags(sb), SHWH)) {
shwh = 1;
au_fset_fillvdir(arg->flags, SHWH);
}
btop = au_fbtop(file);
bbot = au_fbbot_dir(file);
for (bindex = btop; !err && bindex <= bbot; bindex++) {
hf = au_hf_dir(file, bindex);
if (!hf)
continue;
offset = vfsub_llseek(hf, 0, SEEK_SET);
err = offset;
if (unlikely(offset))
break;
arg->bindex = bindex;
au_fclr_fillvdir(arg->flags, WHABLE);
if (shwh
|| (bindex != bbot
&& au_br_whable(au_sbr_perm(sb, bindex))))
au_fset_fillvdir(arg->flags, WHABLE);
do {
arg->err = 0;
au_fclr_fillvdir(arg->flags, CALLED);
/* smp_mb(); */
err = vfsub_iterate_dir(hf, &arg->ctx);
if (err >= 0)
err = arg->err;
} while (!err && au_ftest_fillvdir(arg->flags, CALLED));
/*
* dir_relax() may be good for concurrency, but aufs should not
* use it since it will cause a lockdep problem.
*/
}
if (!err && shwh)
err = au_handle_shwh(sb, arg->vdir, &arg->whlist, &arg->delist);
au_nhash_wh_free(&arg->whlist);
out_delist:
au_nhash_de_free(&arg->delist);
out:
return err;
}
static int read_vdir(struct file *file, int may_read)
{
int err;
unsigned long expire;
unsigned char do_read;
struct fillvdir_arg arg = {
.ctx = {
.actor = fillvdir
}
};
struct inode *inode;
struct au_vdir *vdir, *allocated;
err = 0;
inode = file_inode(file);
IMustLock(inode);
IiMustWriteLock(inode);
SiMustAnyLock(inode->i_sb);
allocated = NULL;
do_read = 0;
expire = au_sbi(inode->i_sb)->si_rdcache;
vdir = au_ivdir(inode);
if (!vdir) {
do_read = 1;
vdir = alloc_vdir(file);
err = PTR_ERR(vdir);
if (IS_ERR(vdir))
goto out;
err = 0;
allocated = vdir;
} else if (may_read
&& (!inode_eq_iversion(inode, vdir->vd_version)
|| time_after(jiffies, vdir->vd_jiffy + expire))) {
do_read = 1;
err = reinit_vdir(vdir);
if (unlikely(err))
goto out;
}
if (!do_read)
return 0; /* success */
arg.file = file;
arg.vdir = vdir;
err = au_do_read_vdir(&arg);
if (!err) {
/* file->f_pos = 0; */ /* todo: ctx->pos? */
vdir->vd_version = inode_query_iversion(inode);
vdir->vd_last.ul = 0;
vdir->vd_last.p.deblk = vdir->vd_deblk[0];
if (allocated)
au_set_ivdir(inode, allocated);
} else if (allocated)
au_vdir_free(allocated);
out:
return err;
}
static int copy_vdir(struct au_vdir *tgt, struct au_vdir *src)
{
int err, rerr;
unsigned long ul, n;
const unsigned int deblk_sz = src->vd_deblk_sz;
AuDebugOn(tgt->vd_nblk != 1);
err = -ENOMEM;
if (tgt->vd_nblk < src->vd_nblk) {
unsigned char **p;
p = au_krealloc(tgt->vd_deblk, sizeof(*p) * src->vd_nblk,
GFP_NOFS, /*may_shrink*/0);
if (unlikely(!p))
goto out;
tgt->vd_deblk = p;
}
if (tgt->vd_deblk_sz != deblk_sz) {
unsigned char *p;
tgt->vd_deblk_sz = deblk_sz;
p = au_krealloc(tgt->vd_deblk[0], deblk_sz, GFP_NOFS,
/*may_shrink*/1);
if (unlikely(!p))
goto out;
tgt->vd_deblk[0] = p;
}
memcpy(tgt->vd_deblk[0], src->vd_deblk[0], deblk_sz);
tgt->vd_version = src->vd_version;
tgt->vd_jiffy = src->vd_jiffy;
n = src->vd_nblk;
for (ul = 1; ul < n; ul++) {
tgt->vd_deblk[ul] = kmemdup(src->vd_deblk[ul], deblk_sz,
GFP_NOFS);
if (unlikely(!tgt->vd_deblk[ul]))
goto out;
tgt->vd_nblk++;
}
tgt->vd_nblk = n;
tgt->vd_last.ul = tgt->vd_last.ul;
tgt->vd_last.p.deblk = tgt->vd_deblk[tgt->vd_last.ul];
tgt->vd_last.p.deblk += src->vd_last.p.deblk
- src->vd_deblk[src->vd_last.ul];
/* smp_mb(); */
return 0; /* success */
out:
rerr = reinit_vdir(tgt);
BUG_ON(rerr);
return err;
}
int au_vdir_init(struct file *file)
{
int err;
struct inode *inode;
struct au_vdir *vdir_cache, *allocated;
/* test file->f_pos here instead of ctx->pos */
err = read_vdir(file, !file->f_pos);
if (unlikely(err))
goto out;
allocated = NULL;
vdir_cache = au_fvdir_cache(file);
if (!vdir_cache) {
vdir_cache = alloc_vdir(file);
err = PTR_ERR(vdir_cache);
if (IS_ERR(vdir_cache))
goto out;
allocated = vdir_cache;
} else if (!file->f_pos && vdir_cache->vd_version != file->f_version) {
/* test file->f_pos here instead of ctx->pos */
err = reinit_vdir(vdir_cache);
if (unlikely(err))
goto out;
} else
return 0; /* success */
inode = file_inode(file);
err = copy_vdir(vdir_cache, au_ivdir(inode));
if (!err) {
file->f_version = inode_query_iversion(inode);
if (allocated)
au_set_fvdir_cache(file, allocated);
} else if (allocated)
au_vdir_free(allocated);
out:
return err;
}
static loff_t calc_offset(struct au_vdir *vdir)
{
loff_t offset;
union au_vdir_deblk_p p;
p.deblk = vdir->vd_deblk[vdir->vd_last.ul];
offset = vdir->vd_last.p.deblk - p.deblk;
offset += vdir->vd_deblk_sz * vdir->vd_last.ul;
return offset;
}
/* returns true or false */
static int seek_vdir(struct file *file, struct dir_context *ctx)
{
int valid;
unsigned int deblk_sz;
unsigned long ul, n;
loff_t offset;
union au_vdir_deblk_p p, deblk_end;
struct au_vdir *vdir_cache;
valid = 1;
vdir_cache = au_fvdir_cache(file);
offset = calc_offset(vdir_cache);
AuDbg("offset %lld\n", offset);
if (ctx->pos == offset)
goto out;
vdir_cache->vd_last.ul = 0;
vdir_cache->vd_last.p.deblk = vdir_cache->vd_deblk[0];
if (!ctx->pos)
goto out;
valid = 0;
deblk_sz = vdir_cache->vd_deblk_sz;
ul = div64_u64(ctx->pos, deblk_sz);
AuDbg("ul %lu\n", ul);
if (ul >= vdir_cache->vd_nblk)
goto out;
n = vdir_cache->vd_nblk;
for (; ul < n; ul++) {
p.deblk = vdir_cache->vd_deblk[ul];
deblk_end.deblk = p.deblk + deblk_sz;
offset = ul;
offset *= deblk_sz;
while (!is_deblk_end(&p, &deblk_end) && offset < ctx->pos) {
unsigned int l;
l = calc_size(p.de->de_str.len);
offset += l;
p.deblk += l;
}
if (!is_deblk_end(&p, &deblk_end)) {
valid = 1;
vdir_cache->vd_last.ul = ul;
vdir_cache->vd_last.p = p;
break;
}
}
out:
/* smp_mb(); */
if (!valid)
AuDbg("valid %d\n", !valid);
return valid;
}
int au_vdir_fill_de(struct file *file, struct dir_context *ctx)
{
unsigned int l, deblk_sz;
union au_vdir_deblk_p deblk_end;
struct au_vdir *vdir_cache;
struct au_vdir_de *de;
if (!seek_vdir(file, ctx))
return 0;
vdir_cache = au_fvdir_cache(file);
deblk_sz = vdir_cache->vd_deblk_sz;
while (1) {
deblk_end.deblk = vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
deblk_end.deblk += deblk_sz;
while (!is_deblk_end(&vdir_cache->vd_last.p, &deblk_end)) {
de = vdir_cache->vd_last.p.de;
AuDbg("%.*s, off%lld, i%lu, dt%d\n",
de->de_str.len, de->de_str.name, ctx->pos,
(unsigned long)de->de_ino, de->de_type);
if (unlikely(!dir_emit(ctx, de->de_str.name,
de->de_str.len, de->de_ino,
de->de_type))) {
/* todo: ignore the error caused by udba? */
/* return err; */
return 0;
}
l = calc_size(de->de_str.len);
vdir_cache->vd_last.p.deblk += l;
ctx->pos += l;
}
if (vdir_cache->vd_last.ul < vdir_cache->vd_nblk - 1) {
vdir_cache->vd_last.ul++;
vdir_cache->vd_last.p.deblk
= vdir_cache->vd_deblk[vdir_cache->vd_last.ul];
ctx->pos = deblk_sz * vdir_cache->vd_last.ul;
continue;
}
break;
}
/* smp_mb(); */
return 0;
}

895
fs/aufs/vfsub.c Normal file
View File

@ -0,0 +1,895 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* sub-routines for VFS
*/
#include <linux/mnt_namespace.h>
#include <linux/namei.h>
#include <linux/nsproxy.h>
#include <linux/security.h>
#include <linux/splice.h>
#include "aufs.h"
#ifdef CONFIG_AUFS_BR_FUSE
int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb)
{
if (!au_test_fuse(h_sb) || !au_userns)
return 0;
return is_current_mnt_ns(mnt) ? 0 : -EACCES;
}
#endif
int vfsub_sync_filesystem(struct super_block *h_sb, int wait)
{
int err;
lockdep_off();
down_read(&h_sb->s_umount);
err = __sync_filesystem(h_sb, wait);
up_read(&h_sb->s_umount);
lockdep_on();
return err;
}
/* ---------------------------------------------------------------------- */
int vfsub_update_h_iattr(struct path *h_path, int *did)
{
int err;
struct kstat st;
struct super_block *h_sb;
/* for remote fs, leave work for its getattr or d_revalidate */
/* for bad i_attr fs, handle them in aufs_getattr() */
/* still some fs may acquire i_mutex. we need to skip them */
err = 0;
if (!did)
did = &err;
h_sb = h_path->dentry->d_sb;
*did = (!au_test_fs_remote(h_sb) && au_test_fs_refresh_iattr(h_sb));
if (*did)
err = vfsub_getattr(h_path, &st);
return err;
}
/* ---------------------------------------------------------------------- */
struct file *vfsub_dentry_open(struct path *path, int flags)
{
return dentry_open(path, flags /* | __FMODE_NONOTIFY */,
current_cred());
}
struct file *vfsub_filp_open(const char *path, int oflags, int mode)
{
struct file *file;
lockdep_off();
file = filp_open(path,
oflags /* | __FMODE_NONOTIFY */,
mode);
lockdep_on();
if (IS_ERR(file))
goto out;
vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
out:
return file;
}
/*
* Ideally this function should call VFS:do_last() in order to keep all its
* checkings. But it is very hard for aufs to regenerate several VFS internal
* structure such as nameidata. This is a second (or third) best approach.
* cf. linux/fs/namei.c:do_last(), lookup_open() and atomic_open().
*/
int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
struct vfsub_aopen_args *args)
{
int err;
struct au_branch *br = args->br;
struct file *file = args->file;
/* copied from linux/fs/namei.c:atomic_open() */
struct dentry *const DENTRY_NOT_SET = (void *)-1UL;
IMustLock(dir);
AuDebugOn(!dir->i_op->atomic_open);
err = au_br_test_oflag(args->open_flag, br);
if (unlikely(err))
goto out;
au_lcnt_inc(&br->br_nfiles);
file->f_path.dentry = DENTRY_NOT_SET;
file->f_path.mnt = au_br_mnt(br);
AuDbg("%ps\n", dir->i_op->atomic_open);
err = dir->i_op->atomic_open(dir, dentry, file, args->open_flag,
args->create_mode);
if (unlikely(err < 0)) {
au_lcnt_dec(&br->br_nfiles);
goto out;
}
/* temporary workaround for nfsv4 branch */
if (au_test_nfs(dir->i_sb))
nfs_mark_for_revalidate(dir);
if (file->f_mode & FMODE_CREATED)
fsnotify_create(dir, dentry);
if (!(file->f_mode & FMODE_OPENED)) {
au_lcnt_dec(&br->br_nfiles);
goto out;
}
/* todo: call VFS:may_open() here */
/* todo: ima_file_check() too? */
if (!err && (args->open_flag & __FMODE_EXEC))
err = deny_write_access(file);
if (!err)
fsnotify_open(file);
else
au_lcnt_dec(&br->br_nfiles);
/* note that the file is created and still opened */
out:
return err;
}
int vfsub_kern_path(const char *name, unsigned int flags, struct path *path)
{
int err;
err = kern_path(name, flags, path);
if (!err && d_is_positive(path->dentry))
vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
return err;
}
struct dentry *vfsub_lookup_one_len_unlocked(const char *name,
struct dentry *parent, int len)
{
struct path path = {
.mnt = NULL
};
path.dentry = lookup_one_len_unlocked(name, parent, len);
if (IS_ERR(path.dentry))
goto out;
if (d_is_positive(path.dentry))
vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
out:
AuTraceErrPtr(path.dentry);
return path.dentry;
}
struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
int len)
{
struct path path = {
.mnt = NULL
};
/* VFS checks it too, but by WARN_ON_ONCE() */
IMustLock(d_inode(parent));
path.dentry = lookup_one_len(name, parent, len);
if (IS_ERR(path.dentry))
goto out;
if (d_is_positive(path.dentry))
vfsub_update_h_iattr(&path, /*did*/NULL); /*ignore*/
out:
AuTraceErrPtr(path.dentry);
return path.dentry;
}
void vfsub_call_lkup_one(void *args)
{
struct vfsub_lkup_one_args *a = args;
*a->errp = vfsub_lkup_one(a->name, a->parent);
}
/* ---------------------------------------------------------------------- */
struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
struct dentry *d2, struct au_hinode *hdir2)
{
struct dentry *d;
lockdep_off();
d = lock_rename(d1, d2);
lockdep_on();
au_hn_suspend(hdir1);
if (hdir1 != hdir2)
au_hn_suspend(hdir2);
return d;
}
void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
struct dentry *d2, struct au_hinode *hdir2)
{
au_hn_resume(hdir1);
if (hdir1 != hdir2)
au_hn_resume(hdir2);
lockdep_off();
unlock_rename(d1, d2);
lockdep_on();
}
/* ---------------------------------------------------------------------- */
int vfsub_create(struct inode *dir, struct path *path, int mode, bool want_excl)
{
int err;
struct dentry *d;
IMustLock(dir);
d = path->dentry;
path->dentry = d->d_parent;
err = security_path_mknod(path, d, mode, 0);
path->dentry = d;
if (unlikely(err))
goto out;
lockdep_off();
err = vfs_create(dir, path->dentry, mode, want_excl);
lockdep_on();
if (!err) {
struct path tmp = *path;
int did;
vfsub_update_h_iattr(&tmp, &did);
if (did) {
tmp.dentry = path->dentry->d_parent;
vfsub_update_h_iattr(&tmp, /*did*/NULL);
}
/*ignore*/
}
out:
return err;
}
int vfsub_symlink(struct inode *dir, struct path *path, const char *symname)
{
int err;
struct dentry *d;
IMustLock(dir);
d = path->dentry;
path->dentry = d->d_parent;
err = security_path_symlink(path, d, symname);
path->dentry = d;
if (unlikely(err))
goto out;
lockdep_off();
err = vfs_symlink(dir, path->dentry, symname);
lockdep_on();
if (!err) {
struct path tmp = *path;
int did;
vfsub_update_h_iattr(&tmp, &did);
if (did) {
tmp.dentry = path->dentry->d_parent;
vfsub_update_h_iattr(&tmp, /*did*/NULL);
}
/*ignore*/
}
out:
return err;
}
int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev)
{
int err;
struct dentry *d;
IMustLock(dir);
d = path->dentry;
path->dentry = d->d_parent;
err = security_path_mknod(path, d, mode, new_encode_dev(dev));
path->dentry = d;
if (unlikely(err))
goto out;
lockdep_off();
err = vfs_mknod(dir, path->dentry, mode, dev);
lockdep_on();
if (!err) {
struct path tmp = *path;
int did;
vfsub_update_h_iattr(&tmp, &did);
if (did) {
tmp.dentry = path->dentry->d_parent;
vfsub_update_h_iattr(&tmp, /*did*/NULL);
}
/*ignore*/
}
out:
return err;
}
static int au_test_nlink(struct inode *inode)
{
const unsigned int link_max = UINT_MAX >> 1; /* rough margin */
if (!au_test_fs_no_limit_nlink(inode->i_sb)
|| inode->i_nlink < link_max)
return 0;
return -EMLINK;
}
int vfsub_link(struct dentry *src_dentry, struct inode *dir, struct path *path,
struct inode **delegated_inode)
{
int err;
struct dentry *d;
IMustLock(dir);
err = au_test_nlink(d_inode(src_dentry));
if (unlikely(err))
return err;
/* we don't call may_linkat() */
d = path->dentry;
path->dentry = d->d_parent;
err = security_path_link(src_dentry, path, d);
path->dentry = d;
if (unlikely(err))
goto out;
lockdep_off();
err = vfs_link(src_dentry, dir, path->dentry, delegated_inode);
lockdep_on();
if (!err) {
struct path tmp = *path;
int did;
/* fuse has different memory inode for the same inumber */
vfsub_update_h_iattr(&tmp, &did);
if (did) {
tmp.dentry = path->dentry->d_parent;
vfsub_update_h_iattr(&tmp, /*did*/NULL);
tmp.dentry = src_dentry;
vfsub_update_h_iattr(&tmp, /*did*/NULL);
}
/*ignore*/
}
out:
return err;
}
int vfsub_rename(struct inode *src_dir, struct dentry *src_dentry,
struct inode *dir, struct path *path,
struct inode **delegated_inode, unsigned int flags)
{
int err;
struct path tmp = {
.mnt = path->mnt
};
struct dentry *d;
IMustLock(dir);
IMustLock(src_dir);
d = path->dentry;
path->dentry = d->d_parent;
tmp.dentry = src_dentry->d_parent;
err = security_path_rename(&tmp, src_dentry, path, d, /*flags*/0);
path->dentry = d;
if (unlikely(err))
goto out;
lockdep_off();
err = vfs_rename(src_dir, src_dentry, dir, path->dentry,
delegated_inode, flags);
lockdep_on();
if (!err) {
int did;
tmp.dentry = d->d_parent;
vfsub_update_h_iattr(&tmp, &did);
if (did) {
tmp.dentry = src_dentry;
vfsub_update_h_iattr(&tmp, /*did*/NULL);
tmp.dentry = src_dentry->d_parent;
vfsub_update_h_iattr(&tmp, /*did*/NULL);
}
/*ignore*/
}
out:
return err;
}
int vfsub_mkdir(struct inode *dir, struct path *path, int mode)
{
int err;
struct dentry *d;
IMustLock(dir);
d = path->dentry;
path->dentry = d->d_parent;
err = security_path_mkdir(path, d, mode);
path->dentry = d;
if (unlikely(err))
goto out;
lockdep_off();
err = vfs_mkdir(dir, path->dentry, mode);
lockdep_on();
if (!err) {
struct path tmp = *path;
int did;
vfsub_update_h_iattr(&tmp, &did);
if (did) {
tmp.dentry = path->dentry->d_parent;
vfsub_update_h_iattr(&tmp, /*did*/NULL);
}
/*ignore*/
}
out:
return err;
}
int vfsub_rmdir(struct inode *dir, struct path *path)
{
int err;
struct dentry *d;
IMustLock(dir);
d = path->dentry;
path->dentry = d->d_parent;
err = security_path_rmdir(path, d);
path->dentry = d;
if (unlikely(err))
goto out;
lockdep_off();
err = vfs_rmdir(dir, path->dentry);
lockdep_on();
if (!err) {
struct path tmp = {
.dentry = path->dentry->d_parent,
.mnt = path->mnt
};
vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
}
out:
return err;
}
/* ---------------------------------------------------------------------- */
/* todo: support mmap_sem? */
ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
loff_t *ppos)
{
ssize_t err;
lockdep_off();
err = vfs_read(file, ubuf, count, ppos);
lockdep_on();
if (err >= 0)
vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
return err;
}
/* todo: kernel_read()? */
ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
loff_t *ppos)
{
ssize_t err;
mm_segment_t oldfs;
union {
void *k;
char __user *u;
} buf;
buf.k = kbuf;
oldfs = get_fs();
set_fs(KERNEL_DS);
err = vfsub_read_u(file, buf.u, count, ppos);
set_fs(oldfs);
return err;
}
ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
loff_t *ppos)
{
ssize_t err;
lockdep_off();
err = vfs_write(file, ubuf, count, ppos);
lockdep_on();
if (err >= 0)
vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
return err;
}
ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count, loff_t *ppos)
{
ssize_t err;
mm_segment_t oldfs;
union {
void *k;
const char __user *u;
} buf;
buf.k = kbuf;
oldfs = get_fs();
set_fs(KERNEL_DS);
err = vfsub_write_u(file, buf.u, count, ppos);
set_fs(oldfs);
return err;
}
int vfsub_flush(struct file *file, fl_owner_t id)
{
int err;
err = 0;
if (file->f_op->flush) {
if (!au_test_nfs(file->f_path.dentry->d_sb))
err = file->f_op->flush(file, id);
else {
lockdep_off();
err = file->f_op->flush(file, id);
lockdep_on();
}
if (!err)
vfsub_update_h_iattr(&file->f_path, /*did*/NULL);
/*ignore*/
}
return err;
}
int vfsub_iterate_dir(struct file *file, struct dir_context *ctx)
{
int err;
AuDbg("%pD, ctx{%ps, %llu}\n", file, ctx->actor, ctx->pos);
lockdep_off();
err = iterate_dir(file, ctx);
lockdep_on();
if (err >= 0)
vfsub_update_h_iattr(&file->f_path, /*did*/NULL); /*ignore*/
return err;
}
long vfsub_splice_to(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags)
{
long err;
lockdep_off();
err = do_splice_to(in, ppos, pipe, len, flags);
lockdep_on();
file_accessed(in);
if (err >= 0)
vfsub_update_h_iattr(&in->f_path, /*did*/NULL); /*ignore*/
return err;
}
long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags)
{
long err;
lockdep_off();
err = do_splice_from(pipe, out, ppos, len, flags);
lockdep_on();
if (err >= 0)
vfsub_update_h_iattr(&out->f_path, /*did*/NULL); /*ignore*/
return err;
}
int vfsub_fsync(struct file *file, struct path *path, int datasync)
{
int err;
/* file can be NULL */
lockdep_off();
err = vfs_fsync(file, datasync);
lockdep_on();
if (!err) {
if (!path) {
AuDebugOn(!file);
path = &file->f_path;
}
vfsub_update_h_iattr(path, /*did*/NULL); /*ignore*/
}
return err;
}
/* cf. open.c:do_sys_truncate() and do_sys_ftruncate() */
int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
struct file *h_file)
{
int err;
struct inode *h_inode;
struct super_block *h_sb;
if (!h_file) {
err = vfsub_truncate(h_path, length);
goto out;
}
h_inode = d_inode(h_path->dentry);
h_sb = h_inode->i_sb;
lockdep_off();
sb_start_write(h_sb);
lockdep_on();
err = locks_verify_truncate(h_inode, h_file, length);
if (!err)
err = security_path_truncate(h_path);
if (!err) {
lockdep_off();
err = do_truncate(h_path->dentry, length, attr, h_file);
lockdep_on();
}
lockdep_off();
sb_end_write(h_sb);
lockdep_on();
out:
return err;
}
/* ---------------------------------------------------------------------- */
struct au_vfsub_mkdir_args {
int *errp;
struct inode *dir;
struct path *path;
int mode;
};
static void au_call_vfsub_mkdir(void *args)
{
struct au_vfsub_mkdir_args *a = args;
*a->errp = vfsub_mkdir(a->dir, a->path, a->mode);
}
int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode)
{
int err, do_sio, wkq_err;
do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
if (!do_sio) {
lockdep_off();
err = vfsub_mkdir(dir, path, mode);
lockdep_on();
} else {
struct au_vfsub_mkdir_args args = {
.errp = &err,
.dir = dir,
.path = path,
.mode = mode
};
wkq_err = au_wkq_wait(au_call_vfsub_mkdir, &args);
if (unlikely(wkq_err))
err = wkq_err;
}
return err;
}
struct au_vfsub_rmdir_args {
int *errp;
struct inode *dir;
struct path *path;
};
static void au_call_vfsub_rmdir(void *args)
{
struct au_vfsub_rmdir_args *a = args;
*a->errp = vfsub_rmdir(a->dir, a->path);
}
int vfsub_sio_rmdir(struct inode *dir, struct path *path)
{
int err, do_sio, wkq_err;
do_sio = au_test_h_perm_sio(dir, MAY_EXEC | MAY_WRITE);
if (!do_sio) {
lockdep_off();
err = vfsub_rmdir(dir, path);
lockdep_on();
} else {
struct au_vfsub_rmdir_args args = {
.errp = &err,
.dir = dir,
.path = path
};
wkq_err = au_wkq_wait(au_call_vfsub_rmdir, &args);
if (unlikely(wkq_err))
err = wkq_err;
}
return err;
}
/* ---------------------------------------------------------------------- */
struct notify_change_args {
int *errp;
struct path *path;
struct iattr *ia;
struct inode **delegated_inode;
};
static void call_notify_change(void *args)
{
struct notify_change_args *a = args;
struct inode *h_inode;
h_inode = d_inode(a->path->dentry);
IMustLock(h_inode);
*a->errp = -EPERM;
if (!IS_IMMUTABLE(h_inode) && !IS_APPEND(h_inode)) {
lockdep_off();
*a->errp = notify_change(a->path->dentry, a->ia,
a->delegated_inode);
lockdep_on();
if (!*a->errp)
vfsub_update_h_iattr(a->path, /*did*/NULL); /*ignore*/
}
AuTraceErr(*a->errp);
}
int vfsub_notify_change(struct path *path, struct iattr *ia,
struct inode **delegated_inode)
{
int err;
struct notify_change_args args = {
.errp = &err,
.path = path,
.ia = ia,
.delegated_inode = delegated_inode
};
call_notify_change(&args);
return err;
}
int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
struct inode **delegated_inode)
{
int err, wkq_err;
struct notify_change_args args = {
.errp = &err,
.path = path,
.ia = ia,
.delegated_inode = delegated_inode
};
wkq_err = au_wkq_wait(call_notify_change, &args);
if (unlikely(wkq_err))
err = wkq_err;
return err;
}
/* ---------------------------------------------------------------------- */
struct unlink_args {
int *errp;
struct inode *dir;
struct path *path;
struct inode **delegated_inode;
};
static void call_unlink(void *args)
{
struct unlink_args *a = args;
struct dentry *d = a->path->dentry;
struct inode *h_inode;
const int stop_sillyrename = (au_test_nfs(d->d_sb)
&& au_dcount(d) == 1);
IMustLock(a->dir);
a->path->dentry = d->d_parent;
*a->errp = security_path_unlink(a->path, d);
a->path->dentry = d;
if (unlikely(*a->errp))
return;
if (!stop_sillyrename)
dget(d);
h_inode = NULL;
if (d_is_positive(d)) {
h_inode = d_inode(d);
ihold(h_inode);
}
lockdep_off();
*a->errp = vfs_unlink(a->dir, d, a->delegated_inode);
lockdep_on();
if (!*a->errp) {
struct path tmp = {
.dentry = d->d_parent,
.mnt = a->path->mnt
};
vfsub_update_h_iattr(&tmp, /*did*/NULL); /*ignore*/
}
if (!stop_sillyrename)
dput(d);
if (h_inode)
iput(h_inode);
AuTraceErr(*a->errp);
}
/*
* @dir: must be locked.
* @dentry: target dentry.
*/
int vfsub_unlink(struct inode *dir, struct path *path,
struct inode **delegated_inode, int force)
{
int err;
struct unlink_args args = {
.errp = &err,
.dir = dir,
.path = path,
.delegated_inode = delegated_inode
};
if (!force)
call_unlink(&args);
else {
int wkq_err;
wkq_err = au_wkq_wait(call_unlink, &args);
if (unlikely(wkq_err))
err = wkq_err;
}
return err;
}

354
fs/aufs/vfsub.h Normal file
View File

@ -0,0 +1,354 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* sub-routines for VFS
*/
#ifndef __AUFS_VFSUB_H__
#define __AUFS_VFSUB_H__
#ifdef __KERNEL__
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/posix_acl.h>
#include <linux/xattr.h>
#include "debug.h"
/* copied from linux/fs/internal.h */
/* todo: BAD approach!! */
extern void __mnt_drop_write(struct vfsmount *);
extern struct file *alloc_empty_file(int, const struct cred *);
/* ---------------------------------------------------------------------- */
/* lock subclass for lower inode */
/* default MAX_LOCKDEP_SUBCLASSES(8) is not enough */
/* reduce? gave up. */
enum {
AuLsc_I_Begin = I_MUTEX_PARENT2, /* 5 */
AuLsc_I_PARENT, /* lower inode, parent first */
AuLsc_I_PARENT2, /* copyup dirs */
AuLsc_I_PARENT3, /* copyup wh */
AuLsc_I_CHILD,
AuLsc_I_CHILD2,
AuLsc_I_End
};
/* to debug easier, do not make them inlined functions */
#define MtxMustLock(mtx) AuDebugOn(!mutex_is_locked(mtx))
#define IMustLock(i) AuDebugOn(!inode_is_locked(i))
/* ---------------------------------------------------------------------- */
static inline void vfsub_drop_nlink(struct inode *inode)
{
AuDebugOn(!inode->i_nlink);
drop_nlink(inode);
}
static inline void vfsub_dead_dir(struct inode *inode)
{
AuDebugOn(!S_ISDIR(inode->i_mode));
inode->i_flags |= S_DEAD;
clear_nlink(inode);
}
static inline int vfsub_native_ro(struct inode *inode)
{
return sb_rdonly(inode->i_sb)
|| IS_RDONLY(inode)
/* || IS_APPEND(inode) */
|| IS_IMMUTABLE(inode);
}
#ifdef CONFIG_AUFS_BR_FUSE
int vfsub_test_mntns(struct vfsmount *mnt, struct super_block *h_sb);
#else
AuStubInt0(vfsub_test_mntns, struct vfsmount *mnt, struct super_block *h_sb);
#endif
int vfsub_sync_filesystem(struct super_block *h_sb, int wait);
/* ---------------------------------------------------------------------- */
int vfsub_update_h_iattr(struct path *h_path, int *did);
struct file *vfsub_dentry_open(struct path *path, int flags);
struct file *vfsub_filp_open(const char *path, int oflags, int mode);
struct au_branch;
struct vfsub_aopen_args {
struct file *file;
unsigned int open_flag;
umode_t create_mode;
struct au_branch *br;
};
int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
struct vfsub_aopen_args *args);
int vfsub_kern_path(const char *name, unsigned int flags, struct path *path);
struct dentry *vfsub_lookup_one_len_unlocked(const char *name,
struct dentry *parent, int len);
struct dentry *vfsub_lookup_one_len(const char *name, struct dentry *parent,
int len);
struct vfsub_lkup_one_args {
struct dentry **errp;
struct qstr *name;
struct dentry *parent;
};
static inline struct dentry *vfsub_lkup_one(struct qstr *name,
struct dentry *parent)
{
return vfsub_lookup_one_len(name->name, parent, name->len);
}
void vfsub_call_lkup_one(void *args);
/* ---------------------------------------------------------------------- */
static inline int vfsub_mnt_want_write(struct vfsmount *mnt)
{
int err;
lockdep_off();
err = mnt_want_write(mnt);
lockdep_on();
return err;
}
static inline void vfsub_mnt_drop_write(struct vfsmount *mnt)
{
lockdep_off();
mnt_drop_write(mnt);
lockdep_on();
}
#if 0 /* reserved */
static inline void vfsub_mnt_drop_write_file(struct file *file)
{
lockdep_off();
mnt_drop_write_file(file);
lockdep_on();
}
#endif
/* ---------------------------------------------------------------------- */
struct au_hinode;
struct dentry *vfsub_lock_rename(struct dentry *d1, struct au_hinode *hdir1,
struct dentry *d2, struct au_hinode *hdir2);
void vfsub_unlock_rename(struct dentry *d1, struct au_hinode *hdir1,
struct dentry *d2, struct au_hinode *hdir2);
int vfsub_create(struct inode *dir, struct path *path, int mode,
bool want_excl);
int vfsub_symlink(struct inode *dir, struct path *path,
const char *symname);
int vfsub_mknod(struct inode *dir, struct path *path, int mode, dev_t dev);
int vfsub_link(struct dentry *src_dentry, struct inode *dir,
struct path *path, struct inode **delegated_inode);
int vfsub_rename(struct inode *src_hdir, struct dentry *src_dentry,
struct inode *hdir, struct path *path,
struct inode **delegated_inode, unsigned int flags);
int vfsub_mkdir(struct inode *dir, struct path *path, int mode);
int vfsub_rmdir(struct inode *dir, struct path *path);
/* ---------------------------------------------------------------------- */
ssize_t vfsub_read_u(struct file *file, char __user *ubuf, size_t count,
loff_t *ppos);
ssize_t vfsub_read_k(struct file *file, void *kbuf, size_t count,
loff_t *ppos);
ssize_t vfsub_write_u(struct file *file, const char __user *ubuf, size_t count,
loff_t *ppos);
ssize_t vfsub_write_k(struct file *file, void *kbuf, size_t count,
loff_t *ppos);
int vfsub_flush(struct file *file, fl_owner_t id);
int vfsub_iterate_dir(struct file *file, struct dir_context *ctx);
static inline loff_t vfsub_f_size_read(struct file *file)
{
return i_size_read(file_inode(file));
}
static inline unsigned int vfsub_file_flags(struct file *file)
{
unsigned int flags;
spin_lock(&file->f_lock);
flags = file->f_flags;
spin_unlock(&file->f_lock);
return flags;
}
static inline int vfsub_file_execed(struct file *file)
{
/* todo: direct access f_flags */
return !!(vfsub_file_flags(file) & __FMODE_EXEC);
}
#if 0 /* reserved */
static inline void vfsub_file_accessed(struct file *h_file)
{
file_accessed(h_file);
vfsub_update_h_iattr(&h_file->f_path, /*did*/NULL); /*ignore*/
}
#endif
#if 0 /* reserved */
static inline void vfsub_touch_atime(struct vfsmount *h_mnt,
struct dentry *h_dentry)
{
struct path h_path = {
.dentry = h_dentry,
.mnt = h_mnt
};
touch_atime(&h_path);
vfsub_update_h_iattr(&h_path, /*did*/NULL); /*ignore*/
}
#endif
static inline int vfsub_update_time(struct inode *h_inode,
struct timespec64 *ts, int flags)
{
return update_time(h_inode, ts, flags);
/* no vfsub_update_h_iattr() since we don't have struct path */
}
#ifdef CONFIG_FS_POSIX_ACL
static inline int vfsub_acl_chmod(struct inode *h_inode, umode_t h_mode)
{
int err;
err = posix_acl_chmod(h_inode, h_mode);
if (err == -EOPNOTSUPP)
err = 0;
return err;
}
#else
AuStubInt0(vfsub_acl_chmod, struct inode *h_inode, umode_t h_mode);
#endif
long vfsub_splice_to(struct file *in, loff_t *ppos,
struct pipe_inode_info *pipe, size_t len,
unsigned int flags);
long vfsub_splice_from(struct pipe_inode_info *pipe, struct file *out,
loff_t *ppos, size_t len, unsigned int flags);
static inline long vfsub_truncate(struct path *path, loff_t length)
{
long err;
lockdep_off();
err = vfs_truncate(path, length);
lockdep_on();
return err;
}
int vfsub_trunc(struct path *h_path, loff_t length, unsigned int attr,
struct file *h_file);
int vfsub_fsync(struct file *file, struct path *path, int datasync);
/*
* re-use branch fs's ioctl(FICLONE) while aufs itself doesn't support such
* ioctl.
*/
static inline loff_t vfsub_clone_file_range(struct file *src, struct file *dst,
loff_t len)
{
loff_t err;
lockdep_off();
err = vfs_clone_file_range(src, 0, dst, 0, len, /*remap_flags*/0);
lockdep_on();
return err;
}
/* copy_file_range(2) is a systemcall */
static inline ssize_t vfsub_copy_file_range(struct file *src, loff_t src_pos,
struct file *dst, loff_t dst_pos,
size_t len, unsigned int flags)
{
ssize_t ssz;
lockdep_off();
ssz = vfs_copy_file_range(src, src_pos, dst, dst_pos, len, flags);
lockdep_on();
return ssz;
}
/* ---------------------------------------------------------------------- */
static inline loff_t vfsub_llseek(struct file *file, loff_t offset, int origin)
{
loff_t err;
lockdep_off();
err = vfs_llseek(file, offset, origin);
lockdep_on();
return err;
}
/* ---------------------------------------------------------------------- */
int vfsub_sio_mkdir(struct inode *dir, struct path *path, int mode);
int vfsub_sio_rmdir(struct inode *dir, struct path *path);
int vfsub_sio_notify_change(struct path *path, struct iattr *ia,
struct inode **delegated_inode);
int vfsub_notify_change(struct path *path, struct iattr *ia,
struct inode **delegated_inode);
int vfsub_unlink(struct inode *dir, struct path *path,
struct inode **delegated_inode, int force);
static inline int vfsub_getattr(const struct path *path, struct kstat *st)
{
return vfs_getattr(path, st, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT);
}
/* ---------------------------------------------------------------------- */
static inline int vfsub_setxattr(struct dentry *dentry, const char *name,
const void *value, size_t size, int flags)
{
int err;
lockdep_off();
err = vfs_setxattr(dentry, name, value, size, flags);
lockdep_on();
return err;
}
static inline int vfsub_removexattr(struct dentry *dentry, const char *name)
{
int err;
lockdep_off();
err = vfs_removexattr(dentry, name);
lockdep_on();
return err;
}
#endif /* __KERNEL__ */
#endif /* __AUFS_VFSUB_H__ */

830
fs/aufs/wbr_policy.c Normal file
View File

@ -0,0 +1,830 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* policies for selecting one among multiple writable branches
*/
#include <linux/statfs.h>
#include "aufs.h"
/* subset of cpup_attr() */
static noinline_for_stack
int au_cpdown_attr(struct path *h_path, struct dentry *h_src)
{
int err, sbits;
struct iattr ia;
struct inode *h_isrc;
h_isrc = d_inode(h_src);
ia.ia_valid = ATTR_FORCE | ATTR_MODE | ATTR_UID | ATTR_GID;
ia.ia_mode = h_isrc->i_mode;
ia.ia_uid = h_isrc->i_uid;
ia.ia_gid = h_isrc->i_gid;
sbits = !!(ia.ia_mode & (S_ISUID | S_ISGID));
au_cpup_attr_flags(d_inode(h_path->dentry), h_isrc->i_flags);
/* no delegation since it is just created */
err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
/* is this nfs only? */
if (!err && sbits && au_test_nfs(h_path->dentry->d_sb)) {
ia.ia_valid = ATTR_FORCE | ATTR_MODE;
ia.ia_mode = h_isrc->i_mode;
err = vfsub_sio_notify_change(h_path, &ia, /*delegated*/NULL);
}
return err;
}
#define AuCpdown_PARENT_OPQ 1
#define AuCpdown_WHED (1 << 1)
#define AuCpdown_MADE_DIR (1 << 2)
#define AuCpdown_DIROPQ (1 << 3)
#define au_ftest_cpdown(flags, name) ((flags) & AuCpdown_##name)
#define au_fset_cpdown(flags, name) \
do { (flags) |= AuCpdown_##name; } while (0)
#define au_fclr_cpdown(flags, name) \
do { (flags) &= ~AuCpdown_##name; } while (0)
static int au_cpdown_dir_opq(struct dentry *dentry, aufs_bindex_t bdst,
unsigned int *flags)
{
int err;
struct dentry *opq_dentry;
opq_dentry = au_diropq_create(dentry, bdst);
err = PTR_ERR(opq_dentry);
if (IS_ERR(opq_dentry))
goto out;
dput(opq_dentry);
au_fset_cpdown(*flags, DIROPQ);
out:
return err;
}
static int au_cpdown_dir_wh(struct dentry *dentry, struct dentry *h_parent,
struct inode *dir, aufs_bindex_t bdst)
{
int err;
struct path h_path;
struct au_branch *br;
br = au_sbr(dentry->d_sb, bdst);
h_path.dentry = au_wh_lkup(h_parent, &dentry->d_name, br);
err = PTR_ERR(h_path.dentry);
if (IS_ERR(h_path.dentry))
goto out;
err = 0;
if (d_is_positive(h_path.dentry)) {
h_path.mnt = au_br_mnt(br);
err = au_wh_unlink_dentry(au_h_iptr(dir, bdst), &h_path,
dentry);
}
dput(h_path.dentry);
out:
return err;
}
static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
struct au_pin *pin,
struct dentry *h_parent, void *arg)
{
int err, rerr;
aufs_bindex_t bopq, btop;
struct path h_path;
struct dentry *parent;
struct inode *h_dir, *h_inode, *inode, *dir;
unsigned int *flags = arg;
btop = au_dbtop(dentry);
/* dentry is di-locked */
parent = dget_parent(dentry);
dir = d_inode(parent);
h_dir = d_inode(h_parent);
AuDebugOn(h_dir != au_h_iptr(dir, bdst));
IMustLock(h_dir);
err = au_lkup_neg(dentry, bdst, /*wh*/0);
if (unlikely(err < 0))
goto out;
h_path.dentry = au_h_dptr(dentry, bdst);
h_path.mnt = au_sbr_mnt(dentry->d_sb, bdst);
err = vfsub_sio_mkdir(au_h_iptr(dir, bdst), &h_path, 0755);
if (unlikely(err))
goto out_put;
au_fset_cpdown(*flags, MADE_DIR);
bopq = au_dbdiropq(dentry);
au_fclr_cpdown(*flags, WHED);
au_fclr_cpdown(*flags, DIROPQ);
if (au_dbwh(dentry) == bdst)
au_fset_cpdown(*flags, WHED);
if (!au_ftest_cpdown(*flags, PARENT_OPQ) && bopq <= bdst)
au_fset_cpdown(*flags, PARENT_OPQ);
h_inode = d_inode(h_path.dentry);
inode_lock_nested(h_inode, AuLsc_I_CHILD);
if (au_ftest_cpdown(*flags, WHED)) {
err = au_cpdown_dir_opq(dentry, bdst, flags);
if (unlikely(err)) {
inode_unlock(h_inode);
goto out_dir;
}
}
err = au_cpdown_attr(&h_path, au_h_dptr(dentry, btop));
inode_unlock(h_inode);
if (unlikely(err))
goto out_opq;
if (au_ftest_cpdown(*flags, WHED)) {
err = au_cpdown_dir_wh(dentry, h_parent, dir, bdst);
if (unlikely(err))
goto out_opq;
}
inode = d_inode(dentry);
if (au_ibbot(inode) < bdst)
au_set_ibbot(inode, bdst);
au_set_h_iptr(inode, bdst, au_igrab(h_inode),
au_hi_flags(inode, /*isdir*/1));
au_fhsm_wrote(dentry->d_sb, bdst, /*force*/0);
goto out; /* success */
/* revert */
out_opq:
if (au_ftest_cpdown(*flags, DIROPQ)) {
inode_lock_nested(h_inode, AuLsc_I_CHILD);
rerr = au_diropq_remove(dentry, bdst);
inode_unlock(h_inode);
if (unlikely(rerr)) {
AuIOErr("failed removing diropq for %pd b%d (%d)\n",
dentry, bdst, rerr);
err = -EIO;
goto out;
}
}
out_dir:
if (au_ftest_cpdown(*flags, MADE_DIR)) {
rerr = vfsub_sio_rmdir(au_h_iptr(dir, bdst), &h_path);
if (unlikely(rerr)) {
AuIOErr("failed removing %pd b%d (%d)\n",
dentry, bdst, rerr);
err = -EIO;
}
}
out_put:
au_set_h_dptr(dentry, bdst, NULL);
if (au_dbbot(dentry) == bdst)
au_update_dbbot(dentry);
out:
dput(parent);
return err;
}
int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst)
{
int err;
unsigned int flags;
flags = 0;
err = au_cp_dirs(dentry, bdst, au_cpdown_dir, &flags);
return err;
}
/* ---------------------------------------------------------------------- */
/* policies for create */
int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex)
{
int err, i, j, ndentry;
aufs_bindex_t bopq;
struct au_dcsub_pages dpages;
struct au_dpage *dpage;
struct dentry **dentries, *parent, *d;
err = au_dpages_init(&dpages, GFP_NOFS);
if (unlikely(err))
goto out;
parent = dget_parent(dentry);
err = au_dcsub_pages_rev_aufs(&dpages, parent, /*do_include*/0);
if (unlikely(err))
goto out_free;
err = bindex;
for (i = 0; i < dpages.ndpage; i++) {
dpage = dpages.dpages + i;
dentries = dpage->dentries;
ndentry = dpage->ndentry;
for (j = 0; j < ndentry; j++) {
d = dentries[j];
di_read_lock_parent2(d, !AuLock_IR);
bopq = au_dbdiropq(d);
di_read_unlock(d, !AuLock_IR);
if (bopq >= 0 && bopq < err)
err = bopq;
}
}
out_free:
dput(parent);
au_dpages_free(&dpages);
out:
return err;
}
static int au_wbr_bu(struct super_block *sb, aufs_bindex_t bindex)
{
for (; bindex >= 0; bindex--)
if (!au_br_rdonly(au_sbr(sb, bindex)))
return bindex;
return -EROFS;
}
/* top down parent */
static int au_wbr_create_tdp(struct dentry *dentry,
unsigned int flags __maybe_unused)
{
int err;
aufs_bindex_t btop, bindex;
struct super_block *sb;
struct dentry *parent, *h_parent;
sb = dentry->d_sb;
btop = au_dbtop(dentry);
err = btop;
if (!au_br_rdonly(au_sbr(sb, btop)))
goto out;
err = -EROFS;
parent = dget_parent(dentry);
for (bindex = au_dbtop(parent); bindex < btop; bindex++) {
h_parent = au_h_dptr(parent, bindex);
if (!h_parent || d_is_negative(h_parent))
continue;
if (!au_br_rdonly(au_sbr(sb, bindex))) {
err = bindex;
break;
}
}
dput(parent);
/* bottom up here */
if (unlikely(err < 0)) {
err = au_wbr_bu(sb, btop - 1);
if (err >= 0)
err = au_wbr_nonopq(dentry, err);
}
out:
AuDbg("b%d\n", err);
return err;
}
/* ---------------------------------------------------------------------- */
/* an exception for the policy other than tdp */
static int au_wbr_create_exp(struct dentry *dentry)
{
int err;
aufs_bindex_t bwh, bdiropq;
struct dentry *parent;
err = -1;
bwh = au_dbwh(dentry);
parent = dget_parent(dentry);
bdiropq = au_dbdiropq(parent);
if (bwh >= 0) {
if (bdiropq >= 0)
err = min(bdiropq, bwh);
else
err = bwh;
AuDbg("%d\n", err);
} else if (bdiropq >= 0) {
err = bdiropq;
AuDbg("%d\n", err);
}
dput(parent);
if (err >= 0)
err = au_wbr_nonopq(dentry, err);
if (err >= 0 && au_br_rdonly(au_sbr(dentry->d_sb, err)))
err = -1;
AuDbg("%d\n", err);
return err;
}
/* ---------------------------------------------------------------------- */
/* round robin */
static int au_wbr_create_init_rr(struct super_block *sb)
{
int err;
err = au_wbr_bu(sb, au_sbbot(sb));
atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
/* smp_mb(); */
AuDbg("b%d\n", err);
return err;
}
static int au_wbr_create_rr(struct dentry *dentry, unsigned int flags)
{
int err, nbr;
unsigned int u;
aufs_bindex_t bindex, bbot;
struct super_block *sb;
atomic_t *next;
err = au_wbr_create_exp(dentry);
if (err >= 0)
goto out;
sb = dentry->d_sb;
next = &au_sbi(sb)->si_wbr_rr_next;
bbot = au_sbbot(sb);
nbr = bbot + 1;
for (bindex = 0; bindex <= bbot; bindex++) {
if (!au_ftest_wbr(flags, DIR)) {
err = atomic_dec_return(next) + 1;
/* modulo for 0 is meaningless */
if (unlikely(!err))
err = atomic_dec_return(next) + 1;
} else
err = atomic_read(next);
AuDbg("%d\n", err);
u = err;
err = u % nbr;
AuDbg("%d\n", err);
if (!au_br_rdonly(au_sbr(sb, err)))
break;
err = -EROFS;
}
if (err >= 0)
err = au_wbr_nonopq(dentry, err);
out:
AuDbg("%d\n", err);
return err;
}
/* ---------------------------------------------------------------------- */
/* most free space */
static void au_mfs(struct dentry *dentry, struct dentry *parent)
{
struct super_block *sb;
struct au_branch *br;
struct au_wbr_mfs *mfs;
struct dentry *h_parent;
aufs_bindex_t bindex, bbot;
int err;
unsigned long long b, bavail;
struct path h_path;
/* reduce the stack usage */
struct kstatfs *st;
st = kmalloc(sizeof(*st), GFP_NOFS);
if (unlikely(!st)) {
AuWarn1("failed updating mfs(%d), ignored\n", -ENOMEM);
return;
}
bavail = 0;
sb = dentry->d_sb;
mfs = &au_sbi(sb)->si_wbr_mfs;
MtxMustLock(&mfs->mfs_lock);
mfs->mfs_bindex = -EROFS;
mfs->mfsrr_bytes = 0;
if (!parent) {
bindex = 0;
bbot = au_sbbot(sb);
} else {
bindex = au_dbtop(parent);
bbot = au_dbtaildir(parent);
}
for (; bindex <= bbot; bindex++) {
if (parent) {
h_parent = au_h_dptr(parent, bindex);
if (!h_parent || d_is_negative(h_parent))
continue;
}
br = au_sbr(sb, bindex);
if (au_br_rdonly(br))
continue;
/* sb->s_root for NFS is unreliable */
h_path.mnt = au_br_mnt(br);
h_path.dentry = h_path.mnt->mnt_root;
err = vfs_statfs(&h_path, st);
if (unlikely(err)) {
AuWarn1("failed statfs, b%d, %d\n", bindex, err);
continue;
}
/* when the available size is equal, select the lower one */
BUILD_BUG_ON(sizeof(b) < sizeof(st->f_bavail)
|| sizeof(b) < sizeof(st->f_bsize));
b = st->f_bavail * st->f_bsize;
br->br_wbr->wbr_bytes = b;
if (b >= bavail) {
bavail = b;
mfs->mfs_bindex = bindex;
mfs->mfs_jiffy = jiffies;
}
}
mfs->mfsrr_bytes = bavail;
AuDbg("b%d\n", mfs->mfs_bindex);
au_kfree_rcu(st);
}
static int au_wbr_create_mfs(struct dentry *dentry, unsigned int flags)
{
int err;
struct dentry *parent;
struct super_block *sb;
struct au_wbr_mfs *mfs;
err = au_wbr_create_exp(dentry);
if (err >= 0)
goto out;
sb = dentry->d_sb;
parent = NULL;
if (au_ftest_wbr(flags, PARENT))
parent = dget_parent(dentry);
mfs = &au_sbi(sb)->si_wbr_mfs;
mutex_lock(&mfs->mfs_lock);
if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
|| mfs->mfs_bindex < 0
|| au_br_rdonly(au_sbr(sb, mfs->mfs_bindex)))
au_mfs(dentry, parent);
mutex_unlock(&mfs->mfs_lock);
err = mfs->mfs_bindex;
dput(parent);
if (err >= 0)
err = au_wbr_nonopq(dentry, err);
out:
AuDbg("b%d\n", err);
return err;
}
static int au_wbr_create_init_mfs(struct super_block *sb)
{
struct au_wbr_mfs *mfs;
mfs = &au_sbi(sb)->si_wbr_mfs;
mutex_init(&mfs->mfs_lock);
mfs->mfs_jiffy = 0;
mfs->mfs_bindex = -EROFS;
return 0;
}
static int au_wbr_create_fin_mfs(struct super_block *sb __maybe_unused)
{
mutex_destroy(&au_sbi(sb)->si_wbr_mfs.mfs_lock);
return 0;
}
/* ---------------------------------------------------------------------- */
/* top down regardless parent, and then mfs */
static int au_wbr_create_tdmfs(struct dentry *dentry,
unsigned int flags __maybe_unused)
{
int err;
aufs_bindex_t bwh, btail, bindex, bfound, bmfs;
unsigned long long watermark;
struct super_block *sb;
struct au_wbr_mfs *mfs;
struct au_branch *br;
struct dentry *parent;
sb = dentry->d_sb;
mfs = &au_sbi(sb)->si_wbr_mfs;
mutex_lock(&mfs->mfs_lock);
if (time_after(jiffies, mfs->mfs_jiffy + mfs->mfs_expire)
|| mfs->mfs_bindex < 0)
au_mfs(dentry, /*parent*/NULL);
watermark = mfs->mfsrr_watermark;
bmfs = mfs->mfs_bindex;
mutex_unlock(&mfs->mfs_lock);
/* another style of au_wbr_create_exp() */
bwh = au_dbwh(dentry);
parent = dget_parent(dentry);
btail = au_dbtaildir(parent);
if (bwh >= 0 && bwh < btail)
btail = bwh;
err = au_wbr_nonopq(dentry, btail);
if (unlikely(err < 0))
goto out;
btail = err;
bfound = -1;
for (bindex = 0; bindex <= btail; bindex++) {
br = au_sbr(sb, bindex);
if (au_br_rdonly(br))
continue;
if (br->br_wbr->wbr_bytes > watermark) {
bfound = bindex;
break;
}
}
err = bfound;
if (err < 0)
err = bmfs;
out:
dput(parent);
AuDbg("b%d\n", err);
return err;
}
/* ---------------------------------------------------------------------- */
/* most free space and then round robin */
static int au_wbr_create_mfsrr(struct dentry *dentry, unsigned int flags)
{
int err;
struct au_wbr_mfs *mfs;
err = au_wbr_create_mfs(dentry, flags);
if (err >= 0) {
mfs = &au_sbi(dentry->d_sb)->si_wbr_mfs;
mutex_lock(&mfs->mfs_lock);
if (mfs->mfsrr_bytes < mfs->mfsrr_watermark)
err = au_wbr_create_rr(dentry, flags);
mutex_unlock(&mfs->mfs_lock);
}
AuDbg("b%d\n", err);
return err;
}
static int au_wbr_create_init_mfsrr(struct super_block *sb)
{
int err;
au_wbr_create_init_mfs(sb); /* ignore */
err = au_wbr_create_init_rr(sb);
return err;
}
/* ---------------------------------------------------------------------- */
/* top down parent and most free space */
static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags)
{
int err, e2;
unsigned long long b;
aufs_bindex_t bindex, btop, bbot;
struct super_block *sb;
struct dentry *parent, *h_parent;
struct au_branch *br;
err = au_wbr_create_tdp(dentry, flags);
if (unlikely(err < 0))
goto out;
parent = dget_parent(dentry);
btop = au_dbtop(parent);
bbot = au_dbtaildir(parent);
if (btop == bbot)
goto out_parent; /* success */
e2 = au_wbr_create_mfs(dentry, flags);
if (e2 < 0)
goto out_parent; /* success */
/* when the available size is equal, select upper one */
sb = dentry->d_sb;
br = au_sbr(sb, err);
b = br->br_wbr->wbr_bytes;
AuDbg("b%d, %llu\n", err, b);
for (bindex = btop; bindex <= bbot; bindex++) {
h_parent = au_h_dptr(parent, bindex);
if (!h_parent || d_is_negative(h_parent))
continue;
br = au_sbr(sb, bindex);
if (!au_br_rdonly(br) && br->br_wbr->wbr_bytes > b) {
b = br->br_wbr->wbr_bytes;
err = bindex;
AuDbg("b%d, %llu\n", err, b);
}
}
if (err >= 0)
err = au_wbr_nonopq(dentry, err);
out_parent:
dput(parent);
out:
AuDbg("b%d\n", err);
return err;
}
/* ---------------------------------------------------------------------- */
/*
* - top down parent
* - most free space with parent
* - most free space round-robin regardless parent
*/
static int au_wbr_create_pmfsrr(struct dentry *dentry, unsigned int flags)
{
int err;
unsigned long long watermark;
struct super_block *sb;
struct au_branch *br;
struct au_wbr_mfs *mfs;
err = au_wbr_create_pmfs(dentry, flags | AuWbr_PARENT);
if (unlikely(err < 0))
goto out;
sb = dentry->d_sb;
br = au_sbr(sb, err);
mfs = &au_sbi(sb)->si_wbr_mfs;
mutex_lock(&mfs->mfs_lock);
watermark = mfs->mfsrr_watermark;
mutex_unlock(&mfs->mfs_lock);
if (br->br_wbr->wbr_bytes < watermark)
/* regardless the parent dir */
err = au_wbr_create_mfsrr(dentry, flags);
out:
AuDbg("b%d\n", err);
return err;
}
/* ---------------------------------------------------------------------- */
/* policies for copyup */
/* top down parent */
static int au_wbr_copyup_tdp(struct dentry *dentry)
{
return au_wbr_create_tdp(dentry, /*flags, anything is ok*/0);
}
/* bottom up parent */
static int au_wbr_copyup_bup(struct dentry *dentry)
{
int err;
aufs_bindex_t bindex, btop;
struct dentry *parent, *h_parent;
struct super_block *sb;
err = -EROFS;
sb = dentry->d_sb;
parent = dget_parent(dentry);
btop = au_dbtop(parent);
for (bindex = au_dbtop(dentry); bindex >= btop; bindex--) {
h_parent = au_h_dptr(parent, bindex);
if (!h_parent || d_is_negative(h_parent))
continue;
if (!au_br_rdonly(au_sbr(sb, bindex))) {
err = bindex;
break;
}
}
dput(parent);
/* bottom up here */
if (unlikely(err < 0))
err = au_wbr_bu(sb, btop - 1);
AuDbg("b%d\n", err);
return err;
}
/* bottom up */
int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop)
{
int err;
err = au_wbr_bu(dentry->d_sb, btop);
AuDbg("b%d\n", err);
if (err > btop)
err = au_wbr_nonopq(dentry, err);
AuDbg("b%d\n", err);
return err;
}
static int au_wbr_copyup_bu(struct dentry *dentry)
{
int err;
aufs_bindex_t btop;
btop = au_dbtop(dentry);
err = au_wbr_do_copyup_bu(dentry, btop);
return err;
}
/* ---------------------------------------------------------------------- */
struct au_wbr_copyup_operations au_wbr_copyup_ops[] = {
[AuWbrCopyup_TDP] = {
.copyup = au_wbr_copyup_tdp
},
[AuWbrCopyup_BUP] = {
.copyup = au_wbr_copyup_bup
},
[AuWbrCopyup_BU] = {
.copyup = au_wbr_copyup_bu
}
};
struct au_wbr_create_operations au_wbr_create_ops[] = {
[AuWbrCreate_TDP] = {
.create = au_wbr_create_tdp
},
[AuWbrCreate_RR] = {
.create = au_wbr_create_rr,
.init = au_wbr_create_init_rr
},
[AuWbrCreate_MFS] = {
.create = au_wbr_create_mfs,
.init = au_wbr_create_init_mfs,
.fin = au_wbr_create_fin_mfs
},
[AuWbrCreate_MFSV] = {
.create = au_wbr_create_mfs,
.init = au_wbr_create_init_mfs,
.fin = au_wbr_create_fin_mfs
},
[AuWbrCreate_MFSRR] = {
.create = au_wbr_create_mfsrr,
.init = au_wbr_create_init_mfsrr,
.fin = au_wbr_create_fin_mfs
},
[AuWbrCreate_MFSRRV] = {
.create = au_wbr_create_mfsrr,
.init = au_wbr_create_init_mfsrr,
.fin = au_wbr_create_fin_mfs
},
[AuWbrCreate_TDMFS] = {
.create = au_wbr_create_tdmfs,
.init = au_wbr_create_init_mfs,
.fin = au_wbr_create_fin_mfs
},
[AuWbrCreate_TDMFSV] = {
.create = au_wbr_create_tdmfs,
.init = au_wbr_create_init_mfs,
.fin = au_wbr_create_fin_mfs
},
[AuWbrCreate_PMFS] = {
.create = au_wbr_create_pmfs,
.init = au_wbr_create_init_mfs,
.fin = au_wbr_create_fin_mfs
},
[AuWbrCreate_PMFSV] = {
.create = au_wbr_create_pmfs,
.init = au_wbr_create_init_mfs,
.fin = au_wbr_create_fin_mfs
},
[AuWbrCreate_PMFSRR] = {
.create = au_wbr_create_pmfsrr,
.init = au_wbr_create_init_mfsrr,
.fin = au_wbr_create_fin_mfs
},
[AuWbrCreate_PMFSRRV] = {
.create = au_wbr_create_pmfsrr,
.init = au_wbr_create_init_mfsrr,
.fin = au_wbr_create_fin_mfs
}
};

1062
fs/aufs/whout.c Normal file

File diff suppressed because it is too large Load Diff

86
fs/aufs/whout.h Normal file
View File

@ -0,0 +1,86 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* whiteout for logical deletion and opaque directory
*/
#ifndef __AUFS_WHOUT_H__
#define __AUFS_WHOUT_H__
#ifdef __KERNEL__
#include "dir.h"
/* whout.c */
int au_wh_name_alloc(struct qstr *wh, const struct qstr *name);
int au_wh_test(struct dentry *h_parent, struct qstr *wh_name, int try_sio);
int au_diropq_test(struct dentry *h_dentry);
struct au_branch;
struct dentry *au_whtmp_lkup(struct dentry *h_parent, struct au_branch *br,
struct qstr *prefix);
int au_whtmp_ren(struct dentry *h_dentry, struct au_branch *br);
int au_wh_unlink_dentry(struct inode *h_dir, struct path *h_path,
struct dentry *dentry);
int au_wh_init(struct au_branch *br, struct super_block *sb);
/* diropq flags */
#define AuDiropq_CREATE 1
#define au_ftest_diropq(flags, name) ((flags) & AuDiropq_##name)
#define au_fset_diropq(flags, name) \
do { (flags) |= AuDiropq_##name; } while (0)
#define au_fclr_diropq(flags, name) \
do { (flags) &= ~AuDiropq_##name; } while (0)
struct dentry *au_diropq_sio(struct dentry *dentry, aufs_bindex_t bindex,
unsigned int flags);
struct dentry *au_wh_lkup(struct dentry *h_parent, struct qstr *base_name,
struct au_branch *br);
struct dentry *au_wh_create(struct dentry *dentry, aufs_bindex_t bindex,
struct dentry *h_parent);
/* real rmdir for the whiteout-ed dir */
struct au_whtmp_rmdir {
struct inode *dir;
struct au_branch *br;
struct dentry *wh_dentry;
struct au_nhash whlist;
};
struct au_whtmp_rmdir *au_whtmp_rmdir_alloc(struct super_block *sb, gfp_t gfp);
void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp);
int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
struct dentry *wh_dentry, struct au_nhash *whlist);
void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
struct dentry *wh_dentry, struct au_whtmp_rmdir *args);
/* ---------------------------------------------------------------------- */
static inline struct dentry *au_diropq_create(struct dentry *dentry,
aufs_bindex_t bindex)
{
return au_diropq_sio(dentry, bindex, AuDiropq_CREATE);
}
static inline int au_diropq_remove(struct dentry *dentry, aufs_bindex_t bindex)
{
return PTR_ERR(au_diropq_sio(dentry, bindex, !AuDiropq_CREATE));
}
#endif /* __KERNEL__ */
#endif /* __AUFS_WHOUT_H__ */

372
fs/aufs/wkq.c Normal file
View File

@ -0,0 +1,372 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* workqueue for asynchronous/super-io operations
* todo: try new credential scheme
*/
#include <linux/module.h>
#include "aufs.h"
/* internal workqueue named AUFS_WKQ_NAME */
static struct workqueue_struct *au_wkq;
struct au_wkinfo {
struct work_struct wk;
struct kobject *kobj;
unsigned int flags; /* see wkq.h */
au_wkq_func_t func;
void *args;
#ifdef CONFIG_LOCKDEP
int dont_check;
struct held_lock **hlock;
#endif
struct completion *comp;
};
/* ---------------------------------------------------------------------- */
/*
* Aufs passes some operations to the workqueue such as the internal copyup.
* This scheme looks rather unnatural for LOCKDEP debugging feature, since the
* job run by workqueue depends upon the locks acquired in the other task.
* Delegating a small operation to the workqueue, aufs passes its lockdep
* information too. And the job in the workqueue restores the info in order to
* pretend as if it acquired those locks. This is just to make LOCKDEP work
* correctly and expectedly.
*/
#ifndef CONFIG_LOCKDEP
AuStubInt0(au_wkq_lockdep_alloc, struct au_wkinfo *wkinfo);
AuStubVoid(au_wkq_lockdep_free, struct au_wkinfo *wkinfo);
AuStubVoid(au_wkq_lockdep_pre, struct au_wkinfo *wkinfo);
AuStubVoid(au_wkq_lockdep_post, struct au_wkinfo *wkinfo);
AuStubVoid(au_wkq_lockdep_init, struct au_wkinfo *wkinfo);
#else
static void au_wkq_lockdep_init(struct au_wkinfo *wkinfo)
{
wkinfo->hlock = NULL;
wkinfo->dont_check = 0;
}
/*
* 1: matched
* 0: unmatched
*/
static int au_wkq_lockdep_test(struct lock_class_key *key, const char *name)
{
static DEFINE_SPINLOCK(spin);
static struct {
char *name;
struct lock_class_key *key;
} a[] = {
{ .name = "&sbinfo->si_rwsem" },
{ .name = "&finfo->fi_rwsem" },
{ .name = "&dinfo->di_rwsem" },
{ .name = "&iinfo->ii_rwsem" }
};
static int set;
int i;
/* lockless read from 'set.' see below */
if (set == ARRAY_SIZE(a)) {
for (i = 0; i < ARRAY_SIZE(a); i++)
if (a[i].key == key)
goto match;
goto unmatch;
}
spin_lock(&spin);
if (set)
for (i = 0; i < ARRAY_SIZE(a); i++)
if (a[i].key == key) {
spin_unlock(&spin);
goto match;
}
for (i = 0; i < ARRAY_SIZE(a); i++) {
if (a[i].key) {
if (unlikely(a[i].key == key)) { /* rare but possible */
spin_unlock(&spin);
goto match;
} else
continue;
}
if (strstr(a[i].name, name)) {
/*
* the order of these three lines is important for the
* lockless read above.
*/
a[i].key = key;
spin_unlock(&spin);
set++;
/* AuDbg("%d, %s\n", set, name); */
goto match;
}
}
spin_unlock(&spin);
goto unmatch;
match:
return 1;
unmatch:
return 0;
}
static int au_wkq_lockdep_alloc(struct au_wkinfo *wkinfo)
{
int err, n;
struct task_struct *curr;
struct held_lock **hl, *held_locks, *p;
err = 0;
curr = current;
wkinfo->dont_check = lockdep_recursing(curr);
if (wkinfo->dont_check)
goto out;
n = curr->lockdep_depth;
if (!n)
goto out;
err = -ENOMEM;
wkinfo->hlock = kmalloc_array(n + 1, sizeof(*wkinfo->hlock), GFP_NOFS);
if (unlikely(!wkinfo->hlock))
goto out;
err = 0;
#if 0 /* left for debugging */
if (0 && au_debug_test())
lockdep_print_held_locks(curr);
#endif
held_locks = curr->held_locks;
hl = wkinfo->hlock;
while (n--) {
p = held_locks++;
if (au_wkq_lockdep_test(p->instance->key, p->instance->name))
*hl++ = p;
}
*hl = NULL;
out:
return err;
}
static void au_wkq_lockdep_free(struct au_wkinfo *wkinfo)
{
au_kfree_try_rcu(wkinfo->hlock);
}
static void au_wkq_lockdep_pre(struct au_wkinfo *wkinfo)
{
struct held_lock *p, **hl = wkinfo->hlock;
int subclass;
if (wkinfo->dont_check)
lockdep_off();
if (!hl)
return;
while ((p = *hl++)) { /* assignment */
subclass = lockdep_hlock_class(p)->subclass;
/* AuDbg("%s, %d\n", p->instance->name, subclass); */
if (p->read)
rwsem_acquire_read(p->instance, subclass, 0,
/*p->acquire_ip*/_RET_IP_);
else
rwsem_acquire(p->instance, subclass, 0,
/*p->acquire_ip*/_RET_IP_);
}
}
static void au_wkq_lockdep_post(struct au_wkinfo *wkinfo)
{
struct held_lock *p, **hl = wkinfo->hlock;
if (wkinfo->dont_check)
lockdep_on();
if (!hl)
return;
while ((p = *hl++)) /* assignment */
rwsem_release(p->instance, 0, /*p->acquire_ip*/_RET_IP_);
}
#endif
static void wkq_func(struct work_struct *wk)
{
struct au_wkinfo *wkinfo = container_of(wk, struct au_wkinfo, wk);
AuDebugOn(!uid_eq(current_fsuid(), GLOBAL_ROOT_UID));
AuDebugOn(rlimit(RLIMIT_FSIZE) != RLIM_INFINITY);
au_wkq_lockdep_pre(wkinfo);
wkinfo->func(wkinfo->args);
au_wkq_lockdep_post(wkinfo);
if (au_ftest_wkq(wkinfo->flags, WAIT))
complete(wkinfo->comp);
else {
kobject_put(wkinfo->kobj);
module_put(THIS_MODULE); /* todo: ?? */
au_kfree_rcu(wkinfo);
}
}
/*
* Since struct completion is large, try allocating it dynamically.
*/
#define AuWkqCompDeclare(name) struct completion *comp = NULL
static int au_wkq_comp_alloc(struct au_wkinfo *wkinfo, struct completion **comp)
{
*comp = kmalloc(sizeof(**comp), GFP_NOFS);
if (*comp) {
init_completion(*comp);
wkinfo->comp = *comp;
return 0;
}
return -ENOMEM;
}
static void au_wkq_comp_free(struct completion *comp)
{
au_kfree_rcu(comp);
}
static void au_wkq_run(struct au_wkinfo *wkinfo)
{
if (au_ftest_wkq(wkinfo->flags, NEST)) {
if (au_wkq_test()) {
AuWarn1("wkq from wkq, unless silly-rename on NFS,"
" due to a dead dir by UDBA,"
" or async xino write?\n");
AuDebugOn(au_ftest_wkq(wkinfo->flags, WAIT));
}
} else
au_dbg_verify_kthread();
if (au_ftest_wkq(wkinfo->flags, WAIT)) {
INIT_WORK_ONSTACK(&wkinfo->wk, wkq_func);
queue_work(au_wkq, &wkinfo->wk);
} else {
INIT_WORK(&wkinfo->wk, wkq_func);
schedule_work(&wkinfo->wk);
}
}
/*
* Be careful. It is easy to make deadlock happen.
* processA: lock, wkq and wait
* processB: wkq and wait, lock in wkq
* --> deadlock
*/
int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args)
{
int err;
AuWkqCompDeclare(comp);
struct au_wkinfo wkinfo = {
.flags = flags,
.func = func,
.args = args
};
err = au_wkq_comp_alloc(&wkinfo, &comp);
if (unlikely(err))
goto out;
err = au_wkq_lockdep_alloc(&wkinfo);
if (unlikely(err))
goto out_comp;
if (!err) {
au_wkq_run(&wkinfo);
/* no timeout, no interrupt */
wait_for_completion(wkinfo.comp);
}
au_wkq_lockdep_free(&wkinfo);
out_comp:
au_wkq_comp_free(comp);
out:
destroy_work_on_stack(&wkinfo.wk);
return err;
}
/*
* Note: dget/dput() in func for aufs dentries are not supported. It will be a
* problem in a concurrent umounting.
*/
int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
unsigned int flags)
{
int err;
struct au_wkinfo *wkinfo;
atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
/*
* wkq_func() must free this wkinfo.
* it highly depends upon the implementation of workqueue.
*/
err = 0;
wkinfo = kmalloc(sizeof(*wkinfo), GFP_NOFS);
if (wkinfo) {
wkinfo->kobj = &au_sbi(sb)->si_kobj;
wkinfo->flags = flags & ~AuWkq_WAIT;
wkinfo->func = func;
wkinfo->args = args;
wkinfo->comp = NULL;
au_wkq_lockdep_init(wkinfo);
kobject_get(wkinfo->kobj);
__module_get(THIS_MODULE); /* todo: ?? */
au_wkq_run(wkinfo);
} else {
err = -ENOMEM;
au_nwt_done(&au_sbi(sb)->si_nowait);
}
return err;
}
/* ---------------------------------------------------------------------- */
void au_nwt_init(struct au_nowait_tasks *nwt)
{
atomic_set(&nwt->nw_len, 0);
/* smp_mb(); */ /* atomic_set */
init_waitqueue_head(&nwt->nw_wq);
}
void au_wkq_fin(void)
{
destroy_workqueue(au_wkq);
}
int __init au_wkq_init(void)
{
int err;
err = 0;
au_wkq = alloc_workqueue(AUFS_WKQ_NAME, 0, WQ_DFL_ACTIVE);
if (IS_ERR(au_wkq))
err = PTR_ERR(au_wkq);
else if (!au_wkq)
err = -ENOMEM;
return err;
}

89
fs/aufs/wkq.h Normal file
View File

@ -0,0 +1,89 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
* Copyright (C) 2005-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* workqueue for asynchronous/super-io operations
* todo: try new credentials management scheme
*/
#ifndef __AUFS_WKQ_H__
#define __AUFS_WKQ_H__
#ifdef __KERNEL__
#include <linux/wait.h>
struct super_block;
/* ---------------------------------------------------------------------- */
/*
* in the next operation, wait for the 'nowait' tasks in system-wide workqueue
*/
struct au_nowait_tasks {
atomic_t nw_len;
wait_queue_head_t nw_wq;
};
/* ---------------------------------------------------------------------- */
typedef void (*au_wkq_func_t)(void *args);
/* wkq flags */
#define AuWkq_WAIT 1
#define AuWkq_NEST (1 << 1)
#define au_ftest_wkq(flags, name) ((flags) & AuWkq_##name)
#define au_fset_wkq(flags, name) \
do { (flags) |= AuWkq_##name; } while (0)
#define au_fclr_wkq(flags, name) \
do { (flags) &= ~AuWkq_##name; } while (0)
/* wkq.c */
int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
unsigned int flags);
void au_nwt_init(struct au_nowait_tasks *nwt);
int __init au_wkq_init(void);
void au_wkq_fin(void);
/* ---------------------------------------------------------------------- */
static inline int au_wkq_test(void)
{
return current->flags & PF_WQ_WORKER;
}
static inline int au_wkq_wait(au_wkq_func_t func, void *args)
{
return au_wkq_do_wait(AuWkq_WAIT, func, args);
}
static inline void au_nwt_done(struct au_nowait_tasks *nwt)
{
if (atomic_dec_and_test(&nwt->nw_len))
wake_up_all(&nwt->nw_wq);
}
static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
{
wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
return 0;
}
#endif /* __KERNEL__ */
#endif /* __AUFS_WKQ_H__ */

356
fs/aufs/xattr.c Normal file
View File

@ -0,0 +1,356 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2014-2020 Junjiro R. Okajima
*
* This program, aufs is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/*
* handling xattr functions
*/
#include <linux/fs.h>
#include <linux/posix_acl_xattr.h>
#include <linux/xattr.h>
#include "aufs.h"
static int au_xattr_ignore(int err, char *name, unsigned int ignore_flags)
{
if (!ignore_flags)
goto out;
switch (err) {
case -ENOMEM:
case -EDQUOT:
goto out;
}
if ((ignore_flags & AuBrAttr_ICEX) == AuBrAttr_ICEX) {
err = 0;
goto out;
}
#define cmp(brattr, prefix) do { \
if (!strncmp(name, XATTR_##prefix##_PREFIX, \
XATTR_##prefix##_PREFIX_LEN)) { \
if (ignore_flags & AuBrAttr_ICEX_##brattr) \
err = 0; \
goto out; \
} \
} while (0)
cmp(SEC, SECURITY);
cmp(SYS, SYSTEM);
cmp(TR, TRUSTED);
cmp(USR, USER);
#undef cmp
if (ignore_flags & AuBrAttr_ICEX_OTH)
err = 0;
out:
return err;
}
static const int au_xattr_out_of_list = AuBrAttr_ICEX_OTH << 1;
static int au_do_cpup_xattr(struct dentry *h_dst, struct dentry *h_src,
char *name, char **buf, unsigned int ignore_flags,
unsigned int verbose)
{
int err;
ssize_t ssz;
struct inode *h_idst;
ssz = vfs_getxattr_alloc(h_src, name, buf, 0, GFP_NOFS);
err = ssz;
if (unlikely(err <= 0)) {
if (err == -ENODATA
|| (err == -EOPNOTSUPP
&& ((ignore_flags & au_xattr_out_of_list)
|| (au_test_nfs_noacl(d_inode(h_src))
&& (!strcmp(name, XATTR_NAME_POSIX_ACL_ACCESS)
|| !strcmp(name,
XATTR_NAME_POSIX_ACL_DEFAULT))))
))
err = 0;
if (err && (verbose || au_debug_test()))
pr_err("%s, err %d\n", name, err);
goto out;
}
/* unlock it temporary */
h_idst = d_inode(h_dst);
inode_unlock(h_idst);
err = vfsub_setxattr(h_dst, name, *buf, ssz, /*flags*/0);
inode_lock_nested(h_idst, AuLsc_I_CHILD2);
if (unlikely(err)) {
if (verbose || au_debug_test())
pr_err("%s, err %d\n", name, err);
err = au_xattr_ignore(err, name, ignore_flags);
}
out:
return err;
}
int au_cpup_xattr(struct dentry *h_dst, struct dentry *h_src, int ignore_flags,
unsigned int verbose)
{
int err, unlocked, acl_access, acl_default;
ssize_t ssz;
struct inode *h_isrc, *h_idst;
char *value, *p, *o, *e;
/* try stopping to update the source inode while we are referencing */
/* there should not be the parent-child relationship between them */
h_isrc = d_inode(h_src);
h_idst = d_inode(h_dst);
inode_unlock(h_idst);
inode_lock_shared_nested(h_isrc, AuLsc_I_CHILD);
inode_lock_nested(h_idst, AuLsc_I_CHILD2);
unlocked = 0;
/* some filesystems don't list POSIX ACL, for example tmpfs */
ssz = vfs_listxattr(h_src, NULL, 0);
err = ssz;
if (unlikely(err < 0)) {
AuTraceErr(err);
if (err == -ENODATA
|| err == -EOPNOTSUPP)
err = 0; /* ignore */
goto out;
}
err = 0;
p = NULL;
o = NULL;
if (ssz) {
err = -ENOMEM;
p = kmalloc(ssz, GFP_NOFS);
o = p;
if (unlikely(!p))
goto out;
err = vfs_listxattr(h_src, p, ssz);
}
inode_unlock_shared(h_isrc);
unlocked = 1;
AuDbg("err %d, ssz %zd\n", err, ssz);
if (unlikely(err < 0))
goto out_free;
err = 0;
e = p + ssz;
value = NULL;
acl_access = 0;
acl_default = 0;
while (!err && p < e) {
acl_access |= !strncmp(p, XATTR_NAME_POSIX_ACL_ACCESS,
sizeof(XATTR_NAME_POSIX_ACL_ACCESS) - 1);
acl_default |= !strncmp(p, XATTR_NAME_POSIX_ACL_DEFAULT,
sizeof(XATTR_NAME_POSIX_ACL_DEFAULT)
- 1);
err = au_do_cpup_xattr(h_dst, h_src, p, &value, ignore_flags,
verbose);
p += strlen(p) + 1;
}
AuTraceErr(err);
ignore_flags |= au_xattr_out_of_list;
if (!err && !acl_access) {
err = au_do_cpup_xattr(h_dst, h_src,
XATTR_NAME_POSIX_ACL_ACCESS, &value,
ignore_flags, verbose);
AuTraceErr(err);
}
if (!err && !acl_default) {
err = au_do_cpup_xattr(h_dst, h_src,
XATTR_NAME_POSIX_ACL_DEFAULT, &value,
ignore_flags, verbose);
AuTraceErr(err);
}
au_kfree_try_rcu(value);
out_free:
au_kfree_try_rcu(o);
out:
if (!unlocked)
inode_unlock_shared(h_isrc);
AuTraceErr(err);
return err;
}
/* ---------------------------------------------------------------------- */
static int au_smack_reentering(struct super_block *sb)
{
#if IS_ENABLED(CONFIG_SECURITY_SMACK) || IS_ENABLED(CONFIG_SECURITY_SELINUX)
/*
* as a part of lookup, smack_d_instantiate() is called, and it calls
* i_op->getxattr(). ouch.
*/
return si_pid_test(sb);
#else
return 0;
#endif
}
enum {
AU_XATTR_LIST,
AU_XATTR_GET
};
struct au_lgxattr {
int type;
union {
struct {
char *list;
size_t size;
} list;
struct {
const char *name;
void *value;
size_t size;
} get;
} u;
};
static ssize_t au_lgxattr(struct dentry *dentry, struct inode *inode,
struct au_lgxattr *arg)
{
ssize_t err;
int reenter;
struct path h_path;
struct super_block *sb;
sb = dentry->d_sb;
reenter = au_smack_reentering(sb);
if (!reenter) {
err = si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLM);
if (unlikely(err))
goto out;
}
err = au_h_path_getattr(dentry, inode, /*force*/1, &h_path, reenter);
if (unlikely(err))
goto out_si;
if (unlikely(!h_path.dentry))
/* illegally overlapped or something */
goto out_di; /* pretending success */
/* always topmost entry only */
switch (arg->type) {
case AU_XATTR_LIST:
err = vfs_listxattr(h_path.dentry,
arg->u.list.list, arg->u.list.size);
break;
case AU_XATTR_GET:
AuDebugOn(d_is_negative(h_path.dentry));
err = vfs_getxattr(h_path.dentry,
arg->u.get.name, arg->u.get.value,
arg->u.get.size);
break;
}
out_di:
if (!reenter)
di_read_unlock(dentry, AuLock_IR);
out_si:
if (!reenter)
si_read_unlock(sb);
out:
AuTraceErr(err);
return err;
}
ssize_t aufs_listxattr(struct dentry *dentry, char *list, size_t size)
{
struct au_lgxattr arg = {
.type = AU_XATTR_LIST,
.u.list = {
.list = list,
.size = size
},
};
return au_lgxattr(dentry, /*inode*/NULL, &arg);
}
static ssize_t au_getxattr(struct dentry *dentry, struct inode *inode,
const char *name, void *value, size_t size)
{
struct au_lgxattr arg = {
.type = AU_XATTR_GET,
.u.get = {
.name = name,
.value = value,
.size = size
},
};
return au_lgxattr(dentry, inode, &arg);
}
static int au_setxattr(struct dentry *dentry, struct inode *inode,
const char *name, const void *value, size_t size,
int flags)
{
struct au_sxattr arg = {
.type = AU_XATTR_SET,
.u.set = {
.name = name,
.value = value,
.size = size,
.flags = flags
},
};
return au_sxattr(dentry, inode, &arg);
}
/* ---------------------------------------------------------------------- */
static int au_xattr_get(const struct xattr_handler *handler,
struct dentry *dentry, struct inode *inode,
const char *name, void *buffer, size_t size)
{
return au_getxattr(dentry, inode, name, buffer, size);
}
static int au_xattr_set(const struct xattr_handler *handler,
struct dentry *dentry, struct inode *inode,
const char *name, const void *value, size_t size,
int flags)
{
return au_setxattr(dentry, inode, name, value, size, flags);
}
static const struct xattr_handler au_xattr_handler = {
.name = "",
.prefix = "",
.get = au_xattr_get,
.set = au_xattr_set
};
static const struct xattr_handler *au_xattr_handlers[] = {
#ifdef CONFIG_FS_POSIX_ACL
&posix_acl_access_xattr_handler,
&posix_acl_default_xattr_handler,
#endif
&au_xattr_handler, /* must be last */
NULL
};
void au_xattr_init(struct super_block *sb)
{
sb->s_xattr = au_xattr_handlers;
}

1966
fs/aufs/xino.c Normal file

File diff suppressed because it is too large Load Diff

132
fs/compat.c Normal file
View File

@ -0,0 +1,132 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* linux/fs/compat.c
*
* Kernel compatibililty routines for e.g. 32 bit syscall support
* on 64 bit kernels.
*
* Copyright (C) 2002 Stephen Rothwell, IBM Corporation
* Copyright (C) 1997-2000 Jakub Jelinek (jakub@redhat.com)
* Copyright (C) 1998 Eddie C. Dost (ecd@skynet.be)
* Copyright (C) 2001,2002 Andi Kleen, SuSE Labs
* Copyright (C) 2003 Pavel Machek (pavel@ucw.cz)
*/
#include <linux/compat.h>
#include <linux/nfs4_mount.h>
#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/uaccess.h>
#include "internal.h"
struct compat_nfs_string {
compat_uint_t len;
compat_uptr_t data;
};
static inline void compat_nfs_string(struct nfs_string *dst,
struct compat_nfs_string *src)
{
dst->data = compat_ptr(src->data);
dst->len = src->len;
}
struct compat_nfs4_mount_data_v1 {
compat_int_t version;
compat_int_t flags;
compat_int_t rsize;
compat_int_t wsize;
compat_int_t timeo;
compat_int_t retrans;
compat_int_t acregmin;
compat_int_t acregmax;
compat_int_t acdirmin;
compat_int_t acdirmax;
struct compat_nfs_string client_addr;
struct compat_nfs_string mnt_path;
struct compat_nfs_string hostname;
compat_uint_t host_addrlen;
compat_uptr_t host_addr;
compat_int_t proto;
compat_int_t auth_flavourlen;
compat_uptr_t auth_flavours;
};
static int do_nfs4_super_data_conv(void *raw_data)
{
int version = *(compat_uint_t *) raw_data;
if (version == 1) {
struct compat_nfs4_mount_data_v1 *raw = raw_data;
struct nfs4_mount_data *real = raw_data;
/* copy the fields backwards */
real->auth_flavours = compat_ptr(raw->auth_flavours);
real->auth_flavourlen = raw->auth_flavourlen;
real->proto = raw->proto;
real->host_addr = compat_ptr(raw->host_addr);
real->host_addrlen = raw->host_addrlen;
compat_nfs_string(&real->hostname, &raw->hostname);
compat_nfs_string(&real->mnt_path, &raw->mnt_path);
compat_nfs_string(&real->client_addr, &raw->client_addr);
real->acdirmax = raw->acdirmax;
real->acdirmin = raw->acdirmin;
real->acregmax = raw->acregmax;
real->acregmin = raw->acregmin;
real->retrans = raw->retrans;
real->timeo = raw->timeo;
real->wsize = raw->wsize;
real->rsize = raw->rsize;
real->flags = raw->flags;
real->version = raw->version;
}
return 0;
}
#define NFS4_NAME "nfs4"
COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name,
const char __user *, dir_name,
const char __user *, type, compat_ulong_t, flags,
const void __user *, data)
{
char *kernel_type;
void *options;
char *kernel_dev;
int retval;
kernel_type = copy_mount_string(type);
retval = PTR_ERR(kernel_type);
if (IS_ERR(kernel_type))
goto out;
kernel_dev = copy_mount_string(dev_name);
retval = PTR_ERR(kernel_dev);
if (IS_ERR(kernel_dev))
goto out1;
options = copy_mount_options(data);
retval = PTR_ERR(options);
if (IS_ERR(options))
goto out2;
if (kernel_type && options) {
if (!strcmp(kernel_type, NFS4_NAME)) {
retval = -EINVAL;
if (do_nfs4_super_data_conv(options))
goto out3;
}
}
retval = do_mount(kernel_dev, dir_name, kernel_type, flags, options);
out3:
kfree(options);
out2:
kfree(kernel_dev);
out1:
kfree(kernel_type);
out:
return retval;
}

1097
fs/compat_ioctl.c Normal file

File diff suppressed because it is too large Load Diff

120
fs/quota/compat.c Normal file
View File

@ -0,0 +1,120 @@
// SPDX-License-Identifier: GPL-2.0
#include <linux/syscalls.h>
#include <linux/compat.h>
#include <linux/quotaops.h>
/*
* This code works only for 32 bit quota tools over 64 bit OS (x86_64, ia64)
* and is necessary due to alignment problems.
*/
struct compat_if_dqblk {
compat_u64 dqb_bhardlimit;
compat_u64 dqb_bsoftlimit;
compat_u64 dqb_curspace;
compat_u64 dqb_ihardlimit;
compat_u64 dqb_isoftlimit;
compat_u64 dqb_curinodes;
compat_u64 dqb_btime;
compat_u64 dqb_itime;
compat_uint_t dqb_valid;
};
/* XFS structures */
struct compat_fs_qfilestat {
compat_u64 dqb_bhardlimit;
compat_u64 qfs_nblks;
compat_uint_t qfs_nextents;
};
struct compat_fs_quota_stat {
__s8 qs_version;
__u16 qs_flags;
__s8 qs_pad;
struct compat_fs_qfilestat qs_uquota;
struct compat_fs_qfilestat qs_gquota;
compat_uint_t qs_incoredqs;
compat_int_t qs_btimelimit;
compat_int_t qs_itimelimit;
compat_int_t qs_rtbtimelimit;
__u16 qs_bwarnlimit;
__u16 qs_iwarnlimit;
};
COMPAT_SYSCALL_DEFINE4(quotactl32, unsigned int, cmd,
const char __user *, special, qid_t, id,
void __user *, addr)
{
unsigned int cmds;
struct if_dqblk __user *dqblk;
struct compat_if_dqblk __user *compat_dqblk;
struct fs_quota_stat __user *fsqstat;
struct compat_fs_quota_stat __user *compat_fsqstat;
compat_uint_t data;
u16 xdata;
long ret;
cmds = cmd >> SUBCMDSHIFT;
switch (cmds) {
case Q_GETQUOTA:
dqblk = compat_alloc_user_space(sizeof(struct if_dqblk));
compat_dqblk = addr;
ret = kernel_quotactl(cmd, special, id, dqblk);
if (ret)
break;
if (copy_in_user(compat_dqblk, dqblk, sizeof(*compat_dqblk)) ||
get_user(data, &dqblk->dqb_valid) ||
put_user(data, &compat_dqblk->dqb_valid))
ret = -EFAULT;
break;
case Q_SETQUOTA:
dqblk = compat_alloc_user_space(sizeof(struct if_dqblk));
compat_dqblk = addr;
ret = -EFAULT;
if (copy_in_user(dqblk, compat_dqblk, sizeof(*compat_dqblk)) ||
get_user(data, &compat_dqblk->dqb_valid) ||
put_user(data, &dqblk->dqb_valid))
break;
ret = kernel_quotactl(cmd, special, id, dqblk);
break;
case Q_XGETQSTAT:
fsqstat = compat_alloc_user_space(sizeof(struct fs_quota_stat));
compat_fsqstat = addr;
ret = kernel_quotactl(cmd, special, id, fsqstat);
if (ret)
break;
ret = -EFAULT;
/* Copying qs_version, qs_flags, qs_pad */
if (copy_in_user(compat_fsqstat, fsqstat,
offsetof(struct compat_fs_quota_stat, qs_uquota)))
break;
/* Copying qs_uquota */
if (copy_in_user(&compat_fsqstat->qs_uquota,
&fsqstat->qs_uquota,
sizeof(compat_fsqstat->qs_uquota)) ||
get_user(data, &fsqstat->qs_uquota.qfs_nextents) ||
put_user(data, &compat_fsqstat->qs_uquota.qfs_nextents))
break;
/* Copying qs_gquota */
if (copy_in_user(&compat_fsqstat->qs_gquota,
&fsqstat->qs_gquota,
sizeof(compat_fsqstat->qs_gquota)) ||
get_user(data, &fsqstat->qs_gquota.qfs_nextents) ||
put_user(data, &compat_fsqstat->qs_gquota.qfs_nextents))
break;
/* Copying the rest */
if (copy_in_user(&compat_fsqstat->qs_incoredqs,
&fsqstat->qs_incoredqs,
sizeof(struct compat_fs_quota_stat) -
offsetof(struct compat_fs_quota_stat, qs_incoredqs)) ||
get_user(xdata, &fsqstat->qs_iwarnlimit) ||
put_user(xdata, &compat_fsqstat->qs_iwarnlimit))
break;
ret = 0;
break;
default:
ret = kernel_quotactl(cmd, special, id, addr);
}
return ret;
}

View File

@ -0,0 +1,888 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (c) 2000,2002,2005 Silicon Graphics, Inc.
* Copyright (c) 2013 Red Hat, Inc.
* All Rights Reserved.
*/
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_log_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_inode.h"
#include "xfs_dir2.h"
/*
* Shortform directory ops
*/
static int
xfs_dir2_sf_entsize(
struct xfs_dir2_sf_hdr *hdr,
int len)
{
int count = sizeof(struct xfs_dir2_sf_entry); /* namelen + offset */
count += len; /* name */
count += hdr->i8count ? XFS_INO64_SIZE : XFS_INO32_SIZE; /* ino # */
return count;
}
static int
xfs_dir3_sf_entsize(
struct xfs_dir2_sf_hdr *hdr,
int len)
{
return xfs_dir2_sf_entsize(hdr, len) + sizeof(uint8_t);
}
static struct xfs_dir2_sf_entry *
xfs_dir2_sf_nextentry(
struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep)
{
return (struct xfs_dir2_sf_entry *)
((char *)sfep + xfs_dir2_sf_entsize(hdr, sfep->namelen));
}
static struct xfs_dir2_sf_entry *
xfs_dir3_sf_nextentry(
struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep)
{
return (struct xfs_dir2_sf_entry *)
((char *)sfep + xfs_dir3_sf_entsize(hdr, sfep->namelen));
}
/*
* For filetype enabled shortform directories, the file type field is stored at
* the end of the name. Because it's only a single byte, endian conversion is
* not necessary. For non-filetype enable directories, the type is always
* unknown and we never store the value.
*/
static uint8_t
xfs_dir2_sfe_get_ftype(
struct xfs_dir2_sf_entry *sfep)
{
return XFS_DIR3_FT_UNKNOWN;
}
static void
xfs_dir2_sfe_put_ftype(
struct xfs_dir2_sf_entry *sfep,
uint8_t ftype)
{
ASSERT(ftype < XFS_DIR3_FT_MAX);
}
static uint8_t
xfs_dir3_sfe_get_ftype(
struct xfs_dir2_sf_entry *sfep)
{
uint8_t ftype;
ftype = sfep->name[sfep->namelen];
if (ftype >= XFS_DIR3_FT_MAX)
return XFS_DIR3_FT_UNKNOWN;
return ftype;
}
static void
xfs_dir3_sfe_put_ftype(
struct xfs_dir2_sf_entry *sfep,
uint8_t ftype)
{
ASSERT(ftype < XFS_DIR3_FT_MAX);
sfep->name[sfep->namelen] = ftype;
}
/*
* Inode numbers in short-form directories can come in two versions,
* either 4 bytes or 8 bytes wide. These helpers deal with the
* two forms transparently by looking at the headers i8count field.
*
* For 64-bit inode number the most significant byte must be zero.
*/
static xfs_ino_t
xfs_dir2_sf_get_ino(
struct xfs_dir2_sf_hdr *hdr,
uint8_t *from)
{
if (hdr->i8count)
return get_unaligned_be64(from) & 0x00ffffffffffffffULL;
else
return get_unaligned_be32(from);
}
static void
xfs_dir2_sf_put_ino(
struct xfs_dir2_sf_hdr *hdr,
uint8_t *to,
xfs_ino_t ino)
{
ASSERT((ino & 0xff00000000000000ULL) == 0);
if (hdr->i8count)
put_unaligned_be64(ino, to);
else
put_unaligned_be32(ino, to);
}
static xfs_ino_t
xfs_dir2_sf_get_parent_ino(
struct xfs_dir2_sf_hdr *hdr)
{
return xfs_dir2_sf_get_ino(hdr, hdr->parent);
}
static void
xfs_dir2_sf_put_parent_ino(
struct xfs_dir2_sf_hdr *hdr,
xfs_ino_t ino)
{
xfs_dir2_sf_put_ino(hdr, hdr->parent, ino);
}
/*
* In short-form directory entries the inode numbers are stored at variable
* offset behind the entry name. If the entry stores a filetype value, then it
* sits between the name and the inode number. Hence the inode numbers may only
* be accessed through the helpers below.
*/
static xfs_ino_t
xfs_dir2_sfe_get_ino(
struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep)
{
return xfs_dir2_sf_get_ino(hdr, &sfep->name[sfep->namelen]);
}
static void
xfs_dir2_sfe_put_ino(
struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep,
xfs_ino_t ino)
{
xfs_dir2_sf_put_ino(hdr, &sfep->name[sfep->namelen], ino);
}
static xfs_ino_t
xfs_dir3_sfe_get_ino(
struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep)
{
return xfs_dir2_sf_get_ino(hdr, &sfep->name[sfep->namelen + 1]);
}
static void
xfs_dir3_sfe_put_ino(
struct xfs_dir2_sf_hdr *hdr,
struct xfs_dir2_sf_entry *sfep,
xfs_ino_t ino)
{
xfs_dir2_sf_put_ino(hdr, &sfep->name[sfep->namelen + 1], ino);
}
/*
* Directory data block operations
*/
/*
* For special situations, the dirent size ends up fixed because we always know
* what the size of the entry is. That's true for the "." and "..", and
* therefore we know that they are a fixed size and hence their offsets are
* constant, as is the first entry.
*
* Hence, this calculation is written as a macro to be able to be calculated at
* compile time and so certain offsets can be calculated directly in the
* structure initaliser via the macro. There are two macros - one for dirents
* with ftype and without so there are no unresolvable conditionals in the
* calculations. We also use round_up() as XFS_DIR2_DATA_ALIGN is always a power
* of 2 and the compiler doesn't reject it (unlike roundup()).
*/
#define XFS_DIR2_DATA_ENTSIZE(n) \
round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \
sizeof(xfs_dir2_data_off_t)), XFS_DIR2_DATA_ALIGN)
#define XFS_DIR3_DATA_ENTSIZE(n) \
round_up((offsetof(struct xfs_dir2_data_entry, name[0]) + (n) + \
sizeof(xfs_dir2_data_off_t) + sizeof(uint8_t)), \
XFS_DIR2_DATA_ALIGN)
static int
xfs_dir2_data_entsize(
int n)
{
return XFS_DIR2_DATA_ENTSIZE(n);
}
static int
xfs_dir3_data_entsize(
int n)
{
return XFS_DIR3_DATA_ENTSIZE(n);
}
static uint8_t
xfs_dir2_data_get_ftype(
struct xfs_dir2_data_entry *dep)
{
return XFS_DIR3_FT_UNKNOWN;
}
static void
xfs_dir2_data_put_ftype(
struct xfs_dir2_data_entry *dep,
uint8_t ftype)
{
ASSERT(ftype < XFS_DIR3_FT_MAX);
}
static uint8_t
xfs_dir3_data_get_ftype(
struct xfs_dir2_data_entry *dep)
{
uint8_t ftype = dep->name[dep->namelen];
if (ftype >= XFS_DIR3_FT_MAX)
return XFS_DIR3_FT_UNKNOWN;
return ftype;
}
static void
xfs_dir3_data_put_ftype(
struct xfs_dir2_data_entry *dep,
uint8_t type)
{
ASSERT(type < XFS_DIR3_FT_MAX);
ASSERT(dep->namelen != 0);
dep->name[dep->namelen] = type;
}
/*
* Pointer to an entry's tag word.
*/
static __be16 *
xfs_dir2_data_entry_tag_p(
struct xfs_dir2_data_entry *dep)
{
return (__be16 *)((char *)dep +
xfs_dir2_data_entsize(dep->namelen) - sizeof(__be16));
}
static __be16 *
xfs_dir3_data_entry_tag_p(
struct xfs_dir2_data_entry *dep)
{
return (__be16 *)((char *)dep +
xfs_dir3_data_entsize(dep->namelen) - sizeof(__be16));
}
/*
* location of . and .. in data space (always block 0)
*/
static struct xfs_dir2_data_entry *
xfs_dir2_data_dot_entry_p(
struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
}
static struct xfs_dir2_data_entry *
xfs_dir2_data_dotdot_entry_p(
struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
XFS_DIR2_DATA_ENTSIZE(1));
}
static struct xfs_dir2_data_entry *
xfs_dir2_data_first_entry_p(
struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
XFS_DIR2_DATA_ENTSIZE(1) +
XFS_DIR2_DATA_ENTSIZE(2));
}
static struct xfs_dir2_data_entry *
xfs_dir2_ftype_data_dotdot_entry_p(
struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
XFS_DIR3_DATA_ENTSIZE(1));
}
static struct xfs_dir2_data_entry *
xfs_dir2_ftype_data_first_entry_p(
struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir2_data_hdr) +
XFS_DIR3_DATA_ENTSIZE(1) +
XFS_DIR3_DATA_ENTSIZE(2));
}
static struct xfs_dir2_data_entry *
xfs_dir3_data_dot_entry_p(
struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
}
static struct xfs_dir2_data_entry *
xfs_dir3_data_dotdot_entry_p(
struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir3_data_hdr) +
XFS_DIR3_DATA_ENTSIZE(1));
}
static struct xfs_dir2_data_entry *
xfs_dir3_data_first_entry_p(
struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir3_data_hdr) +
XFS_DIR3_DATA_ENTSIZE(1) +
XFS_DIR3_DATA_ENTSIZE(2));
}
static struct xfs_dir2_data_free *
xfs_dir2_data_bestfree_p(struct xfs_dir2_data_hdr *hdr)
{
return hdr->bestfree;
}
static struct xfs_dir2_data_free *
xfs_dir3_data_bestfree_p(struct xfs_dir2_data_hdr *hdr)
{
return ((struct xfs_dir3_data_hdr *)hdr)->best_free;
}
static struct xfs_dir2_data_entry *
xfs_dir2_data_entry_p(struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
}
static struct xfs_dir2_data_unused *
xfs_dir2_data_unused_p(struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_unused *)
((char *)hdr + sizeof(struct xfs_dir2_data_hdr));
}
static struct xfs_dir2_data_entry *
xfs_dir3_data_entry_p(struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_entry *)
((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
}
static struct xfs_dir2_data_unused *
xfs_dir3_data_unused_p(struct xfs_dir2_data_hdr *hdr)
{
return (struct xfs_dir2_data_unused *)
((char *)hdr + sizeof(struct xfs_dir3_data_hdr));
}
/*
* Directory Leaf block operations
*/
static int
xfs_dir2_max_leaf_ents(struct xfs_da_geometry *geo)
{
return (geo->blksize - sizeof(struct xfs_dir2_leaf_hdr)) /
(uint)sizeof(struct xfs_dir2_leaf_entry);
}
static struct xfs_dir2_leaf_entry *
xfs_dir2_leaf_ents_p(struct xfs_dir2_leaf *lp)
{
return lp->__ents;
}
static int
xfs_dir3_max_leaf_ents(struct xfs_da_geometry *geo)
{
return (geo->blksize - sizeof(struct xfs_dir3_leaf_hdr)) /
(uint)sizeof(struct xfs_dir2_leaf_entry);
}
static struct xfs_dir2_leaf_entry *
xfs_dir3_leaf_ents_p(struct xfs_dir2_leaf *lp)
{
return ((struct xfs_dir3_leaf *)lp)->__ents;
}
static void
xfs_dir2_leaf_hdr_from_disk(
struct xfs_dir3_icleaf_hdr *to,
struct xfs_dir2_leaf *from)
{
to->forw = be32_to_cpu(from->hdr.info.forw);
to->back = be32_to_cpu(from->hdr.info.back);
to->magic = be16_to_cpu(from->hdr.info.magic);
to->count = be16_to_cpu(from->hdr.count);
to->stale = be16_to_cpu(from->hdr.stale);
ASSERT(to->magic == XFS_DIR2_LEAF1_MAGIC ||
to->magic == XFS_DIR2_LEAFN_MAGIC);
}
static void
xfs_dir2_leaf_hdr_to_disk(
struct xfs_dir2_leaf *to,
struct xfs_dir3_icleaf_hdr *from)
{
ASSERT(from->magic == XFS_DIR2_LEAF1_MAGIC ||
from->magic == XFS_DIR2_LEAFN_MAGIC);
to->hdr.info.forw = cpu_to_be32(from->forw);
to->hdr.info.back = cpu_to_be32(from->back);
to->hdr.info.magic = cpu_to_be16(from->magic);
to->hdr.count = cpu_to_be16(from->count);
to->hdr.stale = cpu_to_be16(from->stale);
}
static void
xfs_dir3_leaf_hdr_from_disk(
struct xfs_dir3_icleaf_hdr *to,
struct xfs_dir2_leaf *from)
{
struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)from;
to->forw = be32_to_cpu(hdr3->info.hdr.forw);
to->back = be32_to_cpu(hdr3->info.hdr.back);
to->magic = be16_to_cpu(hdr3->info.hdr.magic);
to->count = be16_to_cpu(hdr3->count);
to->stale = be16_to_cpu(hdr3->stale);
ASSERT(to->magic == XFS_DIR3_LEAF1_MAGIC ||
to->magic == XFS_DIR3_LEAFN_MAGIC);
}
static void
xfs_dir3_leaf_hdr_to_disk(
struct xfs_dir2_leaf *to,
struct xfs_dir3_icleaf_hdr *from)
{
struct xfs_dir3_leaf_hdr *hdr3 = (struct xfs_dir3_leaf_hdr *)to;
ASSERT(from->magic == XFS_DIR3_LEAF1_MAGIC ||
from->magic == XFS_DIR3_LEAFN_MAGIC);
hdr3->info.hdr.forw = cpu_to_be32(from->forw);
hdr3->info.hdr.back = cpu_to_be32(from->back);
hdr3->info.hdr.magic = cpu_to_be16(from->magic);
hdr3->count = cpu_to_be16(from->count);
hdr3->stale = cpu_to_be16(from->stale);
}
/*
* Directory/Attribute Node block operations
*/
static struct xfs_da_node_entry *
xfs_da2_node_tree_p(struct xfs_da_intnode *dap)
{
return dap->__btree;
}
static struct xfs_da_node_entry *
xfs_da3_node_tree_p(struct xfs_da_intnode *dap)
{
return ((struct xfs_da3_intnode *)dap)->__btree;
}
static void
xfs_da2_node_hdr_from_disk(
struct xfs_da3_icnode_hdr *to,
struct xfs_da_intnode *from)
{
ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA_NODE_MAGIC));
to->forw = be32_to_cpu(from->hdr.info.forw);
to->back = be32_to_cpu(from->hdr.info.back);
to->magic = be16_to_cpu(from->hdr.info.magic);
to->count = be16_to_cpu(from->hdr.__count);
to->level = be16_to_cpu(from->hdr.__level);
}
static void
xfs_da2_node_hdr_to_disk(
struct xfs_da_intnode *to,
struct xfs_da3_icnode_hdr *from)
{
ASSERT(from->magic == XFS_DA_NODE_MAGIC);
to->hdr.info.forw = cpu_to_be32(from->forw);
to->hdr.info.back = cpu_to_be32(from->back);
to->hdr.info.magic = cpu_to_be16(from->magic);
to->hdr.__count = cpu_to_be16(from->count);
to->hdr.__level = cpu_to_be16(from->level);
}
static void
xfs_da3_node_hdr_from_disk(
struct xfs_da3_icnode_hdr *to,
struct xfs_da_intnode *from)
{
struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)from;
ASSERT(from->hdr.info.magic == cpu_to_be16(XFS_DA3_NODE_MAGIC));
to->forw = be32_to_cpu(hdr3->info.hdr.forw);
to->back = be32_to_cpu(hdr3->info.hdr.back);
to->magic = be16_to_cpu(hdr3->info.hdr.magic);
to->count = be16_to_cpu(hdr3->__count);
to->level = be16_to_cpu(hdr3->__level);
}
static void
xfs_da3_node_hdr_to_disk(
struct xfs_da_intnode *to,
struct xfs_da3_icnode_hdr *from)
{
struct xfs_da3_node_hdr *hdr3 = (struct xfs_da3_node_hdr *)to;
ASSERT(from->magic == XFS_DA3_NODE_MAGIC);
hdr3->info.hdr.forw = cpu_to_be32(from->forw);
hdr3->info.hdr.back = cpu_to_be32(from->back);
hdr3->info.hdr.magic = cpu_to_be16(from->magic);
hdr3->__count = cpu_to_be16(from->count);
hdr3->__level = cpu_to_be16(from->level);
}
/*
* Directory free space block operations
*/
static int
xfs_dir2_free_max_bests(struct xfs_da_geometry *geo)
{
return (geo->blksize - sizeof(struct xfs_dir2_free_hdr)) /
sizeof(xfs_dir2_data_off_t);
}
static __be16 *
xfs_dir2_free_bests_p(struct xfs_dir2_free *free)
{
return (__be16 *)((char *)free + sizeof(struct xfs_dir2_free_hdr));
}
/*
* Convert data space db to the corresponding free db.
*/
static xfs_dir2_db_t
xfs_dir2_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
{
return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) +
(db / xfs_dir2_free_max_bests(geo));
}
/*
* Convert data space db to the corresponding index in a free db.
*/
static int
xfs_dir2_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
{
return db % xfs_dir2_free_max_bests(geo);
}
static int
xfs_dir3_free_max_bests(struct xfs_da_geometry *geo)
{
return (geo->blksize - sizeof(struct xfs_dir3_free_hdr)) /
sizeof(xfs_dir2_data_off_t);
}
static __be16 *
xfs_dir3_free_bests_p(struct xfs_dir2_free *free)
{
return (__be16 *)((char *)free + sizeof(struct xfs_dir3_free_hdr));
}
/*
* Convert data space db to the corresponding free db.
*/
static xfs_dir2_db_t
xfs_dir3_db_to_fdb(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
{
return xfs_dir2_byte_to_db(geo, XFS_DIR2_FREE_OFFSET) +
(db / xfs_dir3_free_max_bests(geo));
}
/*
* Convert data space db to the corresponding index in a free db.
*/
static int
xfs_dir3_db_to_fdindex(struct xfs_da_geometry *geo, xfs_dir2_db_t db)
{
return db % xfs_dir3_free_max_bests(geo);
}
static void
xfs_dir2_free_hdr_from_disk(
struct xfs_dir3_icfree_hdr *to,
struct xfs_dir2_free *from)
{
to->magic = be32_to_cpu(from->hdr.magic);
to->firstdb = be32_to_cpu(from->hdr.firstdb);
to->nvalid = be32_to_cpu(from->hdr.nvalid);
to->nused = be32_to_cpu(from->hdr.nused);
ASSERT(to->magic == XFS_DIR2_FREE_MAGIC);
}
static void
xfs_dir2_free_hdr_to_disk(
struct xfs_dir2_free *to,
struct xfs_dir3_icfree_hdr *from)
{
ASSERT(from->magic == XFS_DIR2_FREE_MAGIC);
to->hdr.magic = cpu_to_be32(from->magic);
to->hdr.firstdb = cpu_to_be32(from->firstdb);
to->hdr.nvalid = cpu_to_be32(from->nvalid);
to->hdr.nused = cpu_to_be32(from->nused);
}
static void
xfs_dir3_free_hdr_from_disk(
struct xfs_dir3_icfree_hdr *to,
struct xfs_dir2_free *from)
{
struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)from;
to->magic = be32_to_cpu(hdr3->hdr.magic);
to->firstdb = be32_to_cpu(hdr3->firstdb);
to->nvalid = be32_to_cpu(hdr3->nvalid);
to->nused = be32_to_cpu(hdr3->nused);
ASSERT(to->magic == XFS_DIR3_FREE_MAGIC);
}
static void
xfs_dir3_free_hdr_to_disk(
struct xfs_dir2_free *to,
struct xfs_dir3_icfree_hdr *from)
{
struct xfs_dir3_free_hdr *hdr3 = (struct xfs_dir3_free_hdr *)to;
ASSERT(from->magic == XFS_DIR3_FREE_MAGIC);
hdr3->hdr.magic = cpu_to_be32(from->magic);
hdr3->firstdb = cpu_to_be32(from->firstdb);
hdr3->nvalid = cpu_to_be32(from->nvalid);
hdr3->nused = cpu_to_be32(from->nused);
}
static const struct xfs_dir_ops xfs_dir2_ops = {
.sf_entsize = xfs_dir2_sf_entsize,
.sf_nextentry = xfs_dir2_sf_nextentry,
.sf_get_ftype = xfs_dir2_sfe_get_ftype,
.sf_put_ftype = xfs_dir2_sfe_put_ftype,
.sf_get_ino = xfs_dir2_sfe_get_ino,
.sf_put_ino = xfs_dir2_sfe_put_ino,
.sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
.sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
.data_entsize = xfs_dir2_data_entsize,
.data_get_ftype = xfs_dir2_data_get_ftype,
.data_put_ftype = xfs_dir2_data_put_ftype,
.data_entry_tag_p = xfs_dir2_data_entry_tag_p,
.data_bestfree_p = xfs_dir2_data_bestfree_p,
.data_dot_offset = sizeof(struct xfs_dir2_data_hdr),
.data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) +
XFS_DIR2_DATA_ENTSIZE(1),
.data_first_offset = sizeof(struct xfs_dir2_data_hdr) +
XFS_DIR2_DATA_ENTSIZE(1) +
XFS_DIR2_DATA_ENTSIZE(2),
.data_entry_offset = sizeof(struct xfs_dir2_data_hdr),
.data_dot_entry_p = xfs_dir2_data_dot_entry_p,
.data_dotdot_entry_p = xfs_dir2_data_dotdot_entry_p,
.data_first_entry_p = xfs_dir2_data_first_entry_p,
.data_entry_p = xfs_dir2_data_entry_p,
.data_unused_p = xfs_dir2_data_unused_p,
.leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr),
.leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk,
.leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk,
.leaf_max_ents = xfs_dir2_max_leaf_ents,
.leaf_ents_p = xfs_dir2_leaf_ents_p,
.node_hdr_size = sizeof(struct xfs_da_node_hdr),
.node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
.node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
.node_tree_p = xfs_da2_node_tree_p,
.free_hdr_size = sizeof(struct xfs_dir2_free_hdr),
.free_hdr_to_disk = xfs_dir2_free_hdr_to_disk,
.free_hdr_from_disk = xfs_dir2_free_hdr_from_disk,
.free_max_bests = xfs_dir2_free_max_bests,
.free_bests_p = xfs_dir2_free_bests_p,
.db_to_fdb = xfs_dir2_db_to_fdb,
.db_to_fdindex = xfs_dir2_db_to_fdindex,
};
static const struct xfs_dir_ops xfs_dir2_ftype_ops = {
.sf_entsize = xfs_dir3_sf_entsize,
.sf_nextentry = xfs_dir3_sf_nextentry,
.sf_get_ftype = xfs_dir3_sfe_get_ftype,
.sf_put_ftype = xfs_dir3_sfe_put_ftype,
.sf_get_ino = xfs_dir3_sfe_get_ino,
.sf_put_ino = xfs_dir3_sfe_put_ino,
.sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
.sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
.data_entsize = xfs_dir3_data_entsize,
.data_get_ftype = xfs_dir3_data_get_ftype,
.data_put_ftype = xfs_dir3_data_put_ftype,
.data_entry_tag_p = xfs_dir3_data_entry_tag_p,
.data_bestfree_p = xfs_dir2_data_bestfree_p,
.data_dot_offset = sizeof(struct xfs_dir2_data_hdr),
.data_dotdot_offset = sizeof(struct xfs_dir2_data_hdr) +
XFS_DIR3_DATA_ENTSIZE(1),
.data_first_offset = sizeof(struct xfs_dir2_data_hdr) +
XFS_DIR3_DATA_ENTSIZE(1) +
XFS_DIR3_DATA_ENTSIZE(2),
.data_entry_offset = sizeof(struct xfs_dir2_data_hdr),
.data_dot_entry_p = xfs_dir2_data_dot_entry_p,
.data_dotdot_entry_p = xfs_dir2_ftype_data_dotdot_entry_p,
.data_first_entry_p = xfs_dir2_ftype_data_first_entry_p,
.data_entry_p = xfs_dir2_data_entry_p,
.data_unused_p = xfs_dir2_data_unused_p,
.leaf_hdr_size = sizeof(struct xfs_dir2_leaf_hdr),
.leaf_hdr_to_disk = xfs_dir2_leaf_hdr_to_disk,
.leaf_hdr_from_disk = xfs_dir2_leaf_hdr_from_disk,
.leaf_max_ents = xfs_dir2_max_leaf_ents,
.leaf_ents_p = xfs_dir2_leaf_ents_p,
.node_hdr_size = sizeof(struct xfs_da_node_hdr),
.node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
.node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
.node_tree_p = xfs_da2_node_tree_p,
.free_hdr_size = sizeof(struct xfs_dir2_free_hdr),
.free_hdr_to_disk = xfs_dir2_free_hdr_to_disk,
.free_hdr_from_disk = xfs_dir2_free_hdr_from_disk,
.free_max_bests = xfs_dir2_free_max_bests,
.free_bests_p = xfs_dir2_free_bests_p,
.db_to_fdb = xfs_dir2_db_to_fdb,
.db_to_fdindex = xfs_dir2_db_to_fdindex,
};
static const struct xfs_dir_ops xfs_dir3_ops = {
.sf_entsize = xfs_dir3_sf_entsize,
.sf_nextentry = xfs_dir3_sf_nextentry,
.sf_get_ftype = xfs_dir3_sfe_get_ftype,
.sf_put_ftype = xfs_dir3_sfe_put_ftype,
.sf_get_ino = xfs_dir3_sfe_get_ino,
.sf_put_ino = xfs_dir3_sfe_put_ino,
.sf_get_parent_ino = xfs_dir2_sf_get_parent_ino,
.sf_put_parent_ino = xfs_dir2_sf_put_parent_ino,
.data_entsize = xfs_dir3_data_entsize,
.data_get_ftype = xfs_dir3_data_get_ftype,
.data_put_ftype = xfs_dir3_data_put_ftype,
.data_entry_tag_p = xfs_dir3_data_entry_tag_p,
.data_bestfree_p = xfs_dir3_data_bestfree_p,
.data_dot_offset = sizeof(struct xfs_dir3_data_hdr),
.data_dotdot_offset = sizeof(struct xfs_dir3_data_hdr) +
XFS_DIR3_DATA_ENTSIZE(1),
.data_first_offset = sizeof(struct xfs_dir3_data_hdr) +
XFS_DIR3_DATA_ENTSIZE(1) +
XFS_DIR3_DATA_ENTSIZE(2),
.data_entry_offset = sizeof(struct xfs_dir3_data_hdr),
.data_dot_entry_p = xfs_dir3_data_dot_entry_p,
.data_dotdot_entry_p = xfs_dir3_data_dotdot_entry_p,
.data_first_entry_p = xfs_dir3_data_first_entry_p,
.data_entry_p = xfs_dir3_data_entry_p,
.data_unused_p = xfs_dir3_data_unused_p,
.leaf_hdr_size = sizeof(struct xfs_dir3_leaf_hdr),
.leaf_hdr_to_disk = xfs_dir3_leaf_hdr_to_disk,
.leaf_hdr_from_disk = xfs_dir3_leaf_hdr_from_disk,
.leaf_max_ents = xfs_dir3_max_leaf_ents,
.leaf_ents_p = xfs_dir3_leaf_ents_p,
.node_hdr_size = sizeof(struct xfs_da3_node_hdr),
.node_hdr_to_disk = xfs_da3_node_hdr_to_disk,
.node_hdr_from_disk = xfs_da3_node_hdr_from_disk,
.node_tree_p = xfs_da3_node_tree_p,
.free_hdr_size = sizeof(struct xfs_dir3_free_hdr),
.free_hdr_to_disk = xfs_dir3_free_hdr_to_disk,
.free_hdr_from_disk = xfs_dir3_free_hdr_from_disk,
.free_max_bests = xfs_dir3_free_max_bests,
.free_bests_p = xfs_dir3_free_bests_p,
.db_to_fdb = xfs_dir3_db_to_fdb,
.db_to_fdindex = xfs_dir3_db_to_fdindex,
};
static const struct xfs_dir_ops xfs_dir2_nondir_ops = {
.node_hdr_size = sizeof(struct xfs_da_node_hdr),
.node_hdr_to_disk = xfs_da2_node_hdr_to_disk,
.node_hdr_from_disk = xfs_da2_node_hdr_from_disk,
.node_tree_p = xfs_da2_node_tree_p,
};
static const struct xfs_dir_ops xfs_dir3_nondir_ops = {
.node_hdr_size = sizeof(struct xfs_da3_node_hdr),
.node_hdr_to_disk = xfs_da3_node_hdr_to_disk,
.node_hdr_from_disk = xfs_da3_node_hdr_from_disk,
.node_tree_p = xfs_da3_node_tree_p,
};
/*
* Return the ops structure according to the current config. If we are passed
* an inode, then that overrides the default config we use which is based on
* feature bits.
*/
const struct xfs_dir_ops *
xfs_dir_get_ops(
struct xfs_mount *mp,
struct xfs_inode *dp)
{
if (dp)
return dp->d_ops;
if (mp->m_dir_inode_ops)
return mp->m_dir_inode_ops;
if (xfs_sb_version_hascrc(&mp->m_sb))
return &xfs_dir3_ops;
if (xfs_sb_version_hasftype(&mp->m_sb))
return &xfs_dir2_ftype_ops;
return &xfs_dir2_ops;
}
const struct xfs_dir_ops *
xfs_nondir_get_ops(
struct xfs_mount *mp,
struct xfs_inode *dp)
{
if (dp)
return dp->d_ops;
if (mp->m_nondir_inode_ops)
return mp->m_nondir_inode_ops;
if (xfs_sb_version_hascrc(&mp->m_sb))
return &xfs_dir3_nondir_ops;
return &xfs_dir2_nondir_ops;
}