From 953b51b7a398b15bc172e1743181b29b5235b2df Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@redhat.com>
Date: Wed, 13 Jun 2018 15:21:35 -0400
Subject: nfsd: fix corrupted reply to badly ordered compound

[ Upstream commit 5b7b15aee641904ae269be9846610a3950cbd64c ]

We're encoding a single op in the reply but leaving the number of ops
zero, so the reply makes no sense.

Somewhat academic as this isn't a case any real client will hit, though
in theory perhaps that could change in a future protocol extension.

Reviewed-by: Jeff Layton <jlayton@kernel.org>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfsd/nfs4proc.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index bfbee8ddf978..c67064d94096 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1632,6 +1632,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 	if (status) {
 		op = &args->ops[0];
 		op->status = status;
+		resp->opcnt = 1;
 		goto encode_op;
 	}
 
-- 
cgit v1.2.3


From cd3d6463759d21f4093d3434effacc358dd0caf8 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 16 Jun 2018 15:40:48 -0400
Subject: ext4: never move the system.data xattr out of the inode body

commit 8cdb5240ec5928b20490a2bb34cb87e9a5f40226 upstream.

When expanding the extra isize space, we must never move the
system.data xattr out of the inode body.  For performance reasons, it
doesn't make any sense, and the inline data implementation assumes
that system.data xattr is never in the external xattr block.

This addresses CVE-2018-10880

https://bugzilla.kernel.org/show_bug.cgi?id=200005

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Zubin Mithra <zsm@chromium.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/xattr.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 9fb2a751fce4..b51bb73b06a6 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1386,6 +1386,11 @@ retry:
 		/* Find the entry best suited to be pushed into EA block */
 		entry = NULL;
 		for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
+			/* never move system.data out of the inode */
+			if ((last->e_name_len == 4) &&
+			    (last->e_name_index == EXT4_XATTR_INDEX_SYSTEM) &&
+			    !memcmp(last->e_name, "data", 4))
+				continue;
 			total_size =
 			EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
 					EXT4_XATTR_LEN(last->e_name_len);
-- 
cgit v1.2.3


From 2b2ccb29f307c382ce4c19c3cf5733be2abd2dba Mon Sep 17 00:00:00 2001
From: Jon Kuhn <jkuhn@barracuda.com>
Date: Mon, 9 Jul 2018 14:33:14 +0000
Subject: fs/cifs: don't translate SFM_SLASH (U+F026) to backslash

[ Upstream commit c15e3f19a6d5c89b1209dc94b40e568177cb0921 ]

When a Mac client saves an item containing a backslash to a file server
the backslash is represented in the CIFS/SMB protocol as as U+F026.
Before this change, listing a directory containing an item with a
backslash in its name will return that item with the backslash
represented with a true backslash character (U+005C) because
convert_sfm_character mapped U+F026 to U+005C when interpretting the
CIFS/SMB protocol response.  However, attempting to open or stat the
path using a true backslash will result in an error because
convert_to_sfm_char does not map U+005C back to U+F026 causing the
CIFS/SMB request to be made with the backslash represented as U+005C.

This change simply prevents the U+F026 to U+005C conversion from
happenning.  This is analogous to how the code does not do any
translation of UNI_SLASH (U+F000).

Signed-off-by: Jon Kuhn <jkuhn@barracuda.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/cifs_unicode.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifs_unicode.c b/fs/cifs/cifs_unicode.c
index a0b3e7d1be48..211ac472cb9d 100644
--- a/fs/cifs/cifs_unicode.c
+++ b/fs/cifs/cifs_unicode.c
@@ -101,9 +101,6 @@ convert_sfm_char(const __u16 src_char, char *target)
 	case SFM_LESSTHAN:
 		*target = '<';
 		break;
-	case SFM_SLASH:
-		*target = '\\';
-		break;
 	case SFM_SPACE:
 		*target = ' ';
 		break;
-- 
cgit v1.2.3


From 8c47defad8510820d609a66d1c8d3cac64913ac5 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 3 Sep 2018 13:15:58 +1000
Subject: fs/cifs: suppress a string overflow warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit bcfb84a996f6fa90b5e6e2954b2accb7a4711097 ]

A powerpc build of cifs with gcc v8.2.0 produces this warning:

fs/cifs/cifssmb.c: In function ‘CIFSSMBNegotiate’:
fs/cifs/cifssmb.c:605:3: warning: ‘strncpy’ writing 16 bytes into a region of size 1 overflows the destination [-Wstringop-overflow=]
   strncpy(pSMB->DialectsArray+count, protocols[i].name, 16);
   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

Since we are already doing a strlen() on the source, change the strncpy
to a memcpy().

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Steve French <stfrench@microsoft.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/cifssmb.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 63aea21e6298..b9b8f19dce0e 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -577,10 +577,15 @@ CIFSSMBNegotiate(const unsigned int xid, struct cifs_ses *ses)
 	}
 
 	count = 0;
+	/*
+	 * We know that all the name entries in the protocols array
+	 * are short (< 16 bytes anyway) and are NUL terminated.
+	 */
 	for (i = 0; i < CIFS_NUM_PROT; i++) {
-		strncpy(pSMB->DialectsArray+count, protocols[i].name, 16);
-		count += strlen(protocols[i].name) + 1;
-		/* null at end of source and target buffers anyway */
+		size_t len = strlen(protocols[i].name) + 1;
+
+		memcpy(pSMB->DialectsArray+count, protocols[i].name, len);
+		count += len;
 	}
 	inc_rfc1001_len(pSMB, count);
 	pSMB->ByteCount = cpu_to_le16(count);
-- 
cgit v1.2.3


From cd65a43f4dfb75a6bb5211ce28e65e5429b7c499 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 6 Sep 2018 12:47:01 +0300
Subject: cifs: read overflow in is_valid_oplock_break()

[ Upstream commit 097f5863b1a0c9901f180bbd56ae7d630655faaa ]

We need to verify that the "data_offset" is within bounds.

Reported-by: Dr Silvio Cesare of InfoSect <silvio.cesare@gmail.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Sasha Levin <alexander.levin@microsoft.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/misc.c | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 0cc699d9b932..61a09ab2752e 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -406,9 +406,17 @@ is_valid_oplock_break(char *buffer, struct TCP_Server_Info *srv)
 			(struct smb_com_transaction_change_notify_rsp *)buf;
 		struct file_notify_information *pnotify;
 		__u32 data_offset = 0;
+		size_t len = srv->total_read - sizeof(pSMBr->hdr.smb_buf_length);
+
 		if (get_bcc(buf) > sizeof(struct file_notify_information)) {
 			data_offset = le32_to_cpu(pSMBr->DataOffset);
 
+			if (data_offset >
+			    len - sizeof(struct file_notify_information)) {
+				cifs_dbg(FYI, "invalid data_offset %u\n",
+					 data_offset);
+				return true;
+			}
 			pnotify = (struct file_notify_information *)
 				((char *)&pSMBr->hdr.Protocol + data_offset);
 			cifs_dbg(FYI, "dnotify on %s Action: 0x%x\n",
-- 
cgit v1.2.3


From ec2a4f06e31f410e027ff10237c8c824f36fa259 Mon Sep 17 00:00:00 2001
From: Aurelien Aptel <aaptel@suse.com>
Date: Thu, 17 May 2018 16:35:07 +0200
Subject: smb2: fix missing files in root share directory listing

commit 0595751f267994c3c7027377058e4185b3a28e75 upstream.

When mounting a Windows share that is the root of a drive (eg. C$)
the server does not return . and .. directory entries. This results in
the smb2 code path erroneously skipping the 2 first entries.

Pseudo-code of the readdir() code path:

cifs_readdir(struct file, struct dir_context)
    initiate_cifs_search            <-- if no reponse cached yet
        server->ops->query_dir_first

    dir_emit_dots
        dir_emit                    <-- adds "." and ".." if we're at pos=0

    find_cifs_entry
        initiate_cifs_search        <-- if pos < start of current response
                                         (restart search)
        server->ops->query_dir_next <-- if pos > end of current response
                                         (fetch next search res)

    for(...)                        <-- loops over cur response entries
                                          starting at pos
        cifs_filldir                <-- skip . and .., emit entry
            cifs_fill_dirent
            dir_emit
	pos++

A) dir_emit_dots() always adds . & ..
   and sets the current dir pos to 2 (0 and 1 are done).

Therefore we always want the index_to_find to be 2 regardless of if
the response has . and ..

B) smb1 code initializes index_of_last_entry with a +2 offset

  in cifssmb.c CIFSFindFirst():
		psrch_inf->index_of_last_entry = 2 /* skip . and .. */ +
			psrch_inf->entries_in_buffer;

Later in find_cifs_entry() we want to find the next dir entry at pos=2
as a result of (A)

	first_entry_in_buffer = cfile->srch_inf.index_of_last_entry -
					cfile->srch_inf.entries_in_buffer;

This var is the dir pos that the first entry in the buffer will
have therefore it must be 2 in the first call.

If we don't offset index_of_last_entry by 2 (like in (B)),
first_entry_in_buffer=0 but we were instructed to get pos=2 so this
code in find_cifs_entry() skips the 2 first which is ok for non-root
shares, as it skips . and .. from the response but is not ok for root
shares where the 2 first are actual files

		pos_in_buf = index_to_find - first_entry_in_buffer;
                // pos_in_buf=2
		// we skip 2 first response entries :(
		for (i = 0; (i < (pos_in_buf)) && (cur_ent != NULL); i++) {
			/* go entry by entry figuring out which is first */
			cur_ent = nxt_dir_entry(cur_ent, end_of_smb,
						cfile->srch_inf.info_level);
		}

C) cifs_filldir() skips . and .. so we can safely ignore them for now.

Sample program:

int main(int argc, char **argv)
{
	const char *path = argc >= 2 ? argv[1] : ".";
	DIR *dh;
	struct dirent *de;

	printf("listing path <%s>\n", path);
	dh = opendir(path);
	if (!dh) {
		printf("opendir error %d\n", errno);
		return 1;
	}

	while (1) {
		de = readdir(dh);
		if (!de) {
			if (errno) {
				printf("readdir error %d\n", errno);
				return 1;
			}
			printf("end of listing\n");
			break;
		}
		printf("off=%lu <%s>\n", de->d_off, de->d_name);
	}

	return 0;
}

Before the fix with SMB1 on root shares:

<.>            off=1
<..>           off=2
<$Recycle.Bin> off=3
<bootmgr>      off=4

and on non-root shares:

<.>    off=1
<..>   off=4  <-- after adding .., the offsets jumps to +2 because
<2536> off=5       we skipped . and .. from response buffer (C)
<411>  off=6       but still incremented pos
<file> off=7
<fsx>  off=8

Therefore the fix for smb2 is to mimic smb1 behaviour and offset the
index_of_last_entry by 2.

Test results comparing smb1 and smb2 before/after the fix on root
share, non-root shares and on large directories (ie. multi-response
dir listing):

PRE FIX
=======
pre-1-root VS pre-2-root:
        ERR pre-2-root is missing [bootmgr, $Recycle.Bin]
pre-1-nonroot VS pre-2-nonroot:
        OK~ same files, same order, different offsets
pre-1-nonroot-large VS pre-2-nonroot-large:
        OK~ same files, same order, different offsets

POST FIX
========
post-1-root VS post-2-root:
        OK same files, same order, same offsets
post-1-nonroot VS post-2-nonroot:
        OK same files, same order, same offsets
post-1-nonroot-large VS post-2-nonroot-large:
        OK same files, same order, same offsets

REGRESSION?
===========
pre-1-root VS post-1-root:
        OK same files, same order, same offsets
pre-1-nonroot VS post-1-nonroot:
        OK same files, same order, same offsets

BugLink: https://bugzilla.samba.org/show_bug.cgi?id=13107
Signed-off-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Paulo Alcantara <palcantara@suse.deR>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Steve French <stfrench@microsoft.com>
CC: Stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/smb2ops.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index e6b1795fbf2a..2725085a3f9f 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -914,7 +914,7 @@ smb2_query_dir_first(const unsigned int xid, struct cifs_tcon *tcon,
 	}
 
 	srch_inf->entries_in_buffer = 0;
-	srch_inf->index_of_last_entry = 0;
+	srch_inf->index_of_last_entry = 2;
 
 	rc = SMB2_query_directory(xid, tcon, fid->persistent_fid,
 				  fid->volatile_fid, 0, srch_inf);
-- 
cgit v1.2.3


From 574757073482f77ec10caea5e57726190a2837fa Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 5 Oct 2018 15:51:58 -0700
Subject: proc: restrict kernel stack dumps to root

commit f8a00cef17206ecd1b30d3d9f99e10d9fa707aa7 upstream.

Currently, you can use /proc/self/task/*/stack to cause a stack walk on
a task you control while it is running on another CPU.  That means that
the stack can change under the stack walker.  The stack walker does
have guards against going completely off the rails and into random
kernel memory, but it can interpret random data from your kernel stack
as instruction pointers and stack pointers.  This can cause exposure of
kernel stack contents to userspace.

Restrict the ability to inspect kernel stacks of arbitrary tasks to root
in order to prevent a local attacker from exploiting racy stack unwinding
to leak kernel task stack contents.  See the added comment for a longer
rationale.

There don't seem to be any users of this userspace API that can't
gracefully bail out if reading from the file fails.  Therefore, I believe
that this change is unlikely to break things.  In the case that this patch
does end up needing a revert, the next-best solution might be to fake a
single-entry stack based on wchan.

Link: http://lkml.kernel.org/r/20180927153316.200286-1-jannh@google.com
Fixes: 2ec220e27f50 ("proc: add /proc/*/stack")
Signed-off-by: Jann Horn <jannh@google.com>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Ken Chen <kenchen@google.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Laura Abbott <labbott@redhat.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H . Peter Anvin" <hpa@zytor.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/proc/base.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 5f9cec2db6c3..4beed301e224 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -471,6 +471,20 @@ static int proc_pid_stack(struct seq_file *m, struct pid_namespace *ns,
 	int err;
 	int i;
 
+	/*
+	 * The ability to racily run the kernel stack unwinder on a running task
+	 * and then observe the unwinder output is scary; while it is useful for
+	 * debugging kernel issues, it can also allow an attacker to leak kernel
+	 * stack contents.
+	 * Doing this in a manner that is at least safe from races would require
+	 * some work to ensure that the remote task can not be scheduled; and
+	 * even then, this would still expose the unwinder as local attack
+	 * surface.
+	 * Therefore, this interface is restricted to root.
+	 */
+	if (!file_ns_capable(m->file, &init_user_ns, CAP_SYS_ADMIN))
+		return -EACCES;
+
 	entries = kmalloc(MAX_STACK_TRACE_DEPTH * sizeof(*entries), GFP_KERNEL);
 	if (!entries)
 		return -ENOMEM;
-- 
cgit v1.2.3


From 20ba8a53a1ef1cffd0f795206d69181572ae84e8 Mon Sep 17 00:00:00 2001
From: Ashish Samant <ashish.samant@oracle.com>
Date: Fri, 5 Oct 2018 15:52:15 -0700
Subject: ocfs2: fix locking for res->tracking and dlm->tracking_list

commit cbe355f57c8074bc4f452e5b6e35509044c6fa23 upstream.

In dlm_init_lockres() we access and modify res->tracking and
dlm->tracking_list without holding dlm->track_lock.  This can cause list
corruptions and can end up in kernel panic.

Fix this by locking res->tracking and dlm->tracking_list with
dlm->track_lock instead of dlm->spinlock.

Link: http://lkml.kernel.org/r/1529951192-4686-1-git-send-email-ashish.samant@oracle.com
Signed-off-by: Ashish Samant <ashish.samant@oracle.com>
Reviewed-by: Changwei Ge <ge.changwei@h3c.com>
Acked-by: Joseph Qi <jiangqi903@gmail.com>
Acked-by: Jun Piao <piaojun@huawei.com>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Changwei Ge <ge.changwei@h3c.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/dlm/dlmmaster.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 4e2162b355db..0cefb036a17e 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -589,9 +589,9 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm,
 
 	res->last_used = 0;
 
-	spin_lock(&dlm->spinlock);
+	spin_lock(&dlm->track_lock);
 	list_add_tail(&res->tracking, &dlm->tracking_list);
-	spin_unlock(&dlm->spinlock);
+	spin_unlock(&dlm->track_lock);
 
 	memset(res->lvb, 0, DLM_LVB_LEN);
 	memset(res->refmap, 0, sizeof(res->refmap));
-- 
cgit v1.2.3


From fb751efb29d037ee051f326b6f622881ca0b9a14 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 13 Jun 2018 00:51:28 -0400
Subject: ext4: always verify the magic number in xattr blocks

commit 513f86d73855ce556ea9522b6bfd79f87356dc3a upstream.

If there an inode points to a block which is also some other type of
metadata block (such as a block allocation bitmap), the
buffer_verified flag can be set when it was validated as that other
metadata block type; however, it would make a really terrible external
attribute block.  The reason why we use the verified flag is to avoid
constantly reverifying the block.  However, it doesn't take much
overhead to make sure the magic number of the xattr block is correct,
and this will avoid potential crashes.

This addresses CVE-2018-10879.

https://bugzilla.kernel.org/show_bug.cgi?id=200001

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
[Backported to 4.4: adjust context]
Signed-off-by: Daniel Rosenberg <drosen@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/xattr.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index b51bb73b06a6..d0aaf338fa9f 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -220,12 +220,12 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
 {
 	int error;
 
-	if (buffer_verified(bh))
-		return 0;
-
 	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
 	    BHDR(bh)->h_blocks != cpu_to_le32(1))
 		return -EFSCORRUPTED;
+	if (buffer_verified(bh))
+		return 0;
+
 	if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
 		return -EFSBADCRC;
 	error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
-- 
cgit v1.2.3


From c4c84454902516b7648a18d5173304da7dffa9d6 Mon Sep 17 00:00:00 2001
From: Richard Weinberger <richard@nod.at>
Date: Mon, 3 Sep 2018 23:06:23 +0200
Subject: ubifs: Check for name being NULL while mounting

commit 37f31b6ca4311b94d985fb398a72e5399ad57925 upstream.

The requested device name can be NULL or an empty string.
Check for that and refuse to continue. UBIFS has to do this manually
since we cannot use mount_bdev(), which checks for this condition.

Fixes: 1e51764a3c2ac ("UBIFS: add new flash file system")
Reported-by: syzbot+38bd0f7865e5c6379280@syzkaller.appspotmail.com
Signed-off-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ubifs/super.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index 0bb6de356451..7968b7a5e787 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -1918,6 +1918,9 @@ static struct ubi_volume_desc *open_ubi(const char *name, int mode)
 	int dev, vol;
 	char *endptr;
 
+	if (!name || !*name)
+		return ERR_PTR(-EINVAL);
+
 	/* First, try to open using the device node path method */
 	ubi = ubi_open_volume_path(name, mode);
 	if (!IS_ERR(ubi))
-- 
cgit v1.2.3


From 7a3d844f83d47c432fba7d28570bc68b5015ecd0 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 13 Jun 2018 00:23:11 -0400
Subject: ext4: add corruption check in ext4_xattr_set_entry()

commit 5369a762c882c0b6e9599e4ebbb3a9ba9eee7e2d upstream.

In theory this should have been caught earlier when the xattr list was
verified, but in case it got missed, it's simple enough to add check
to make sure we don't overrun the xattr buffer.

This addresses CVE-2018-10879.

https://bugzilla.kernel.org/show_bug.cgi?id=200001

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Andreas Dilger <adilger@dilger.ca>
[bwh: Backported to 3.16:
 - Add inode parameter to ext4_xattr_set_entry() and update callers
 - Adjust context]
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
[adjusted for 4.4 context]
Signed-off-by: Daniel Rosenberg <drosen@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/xattr.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index d0aaf338fa9f..d6bae37489af 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -638,14 +638,20 @@ static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
 }
 
 static int
-ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
+ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s,
+		     struct inode *inode)
 {
-	struct ext4_xattr_entry *last;
+	struct ext4_xattr_entry *last, *next;
 	size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
 
 	/* Compute min_offs and last. */
 	last = s->first;
-	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
+	for (; !IS_LAST_ENTRY(last); last = next) {
+		next = EXT4_XATTR_NEXT(last);
+		if ((void *)next >= s->end) {
+			EXT4_ERROR_INODE(inode, "corrupted xattr entries");
+			return -EFSCORRUPTED;
+		}
 		if (!last->e_value_block && last->e_value_size) {
 			size_t offs = le16_to_cpu(last->e_value_offs);
 			if (offs < min_offs)
@@ -825,7 +831,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 				ce = NULL;
 			}
 			ea_bdebug(bs->bh, "modifying in-place");
-			error = ext4_xattr_set_entry(i, s);
+			error = ext4_xattr_set_entry(i, s, inode);
 			if (!error) {
 				if (!IS_LAST_ENTRY(s->first))
 					ext4_xattr_rehash(header(s->base),
@@ -875,7 +881,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
 		s->end = s->base + sb->s_blocksize;
 	}
 
-	error = ext4_xattr_set_entry(i, s);
+	error = ext4_xattr_set_entry(i, s, inode);
 	if (error == -EFSCORRUPTED)
 		goto bad_block;
 	if (error)
@@ -1037,7 +1043,7 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
 
 	if (EXT4_I(inode)->i_extra_isize == 0)
 		return -ENOSPC;
-	error = ext4_xattr_set_entry(i, s);
+	error = ext4_xattr_set_entry(i, s, inode);
 	if (error) {
 		if (error == -ENOSPC &&
 		    ext4_has_inline_data(inode)) {
@@ -1049,7 +1055,7 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
 			error = ext4_xattr_ibody_find(inode, i, is);
 			if (error)
 				return error;
-			error = ext4_xattr_set_entry(i, s);
+			error = ext4_xattr_set_entry(i, s, inode);
 		}
 		if (error)
 			return error;
@@ -1075,7 +1081,7 @@ static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
 
 	if (EXT4_I(inode)->i_extra_isize == 0)
 		return -ENOSPC;
-	error = ext4_xattr_set_entry(i, s);
+	error = ext4_xattr_set_entry(i, s, inode);
 	if (error)
 		return error;
 	header = IHDR(inode, ext4_raw_inode(&is->iloc));
-- 
cgit v1.2.3


From 5ac147ebfa1c50cc0f25e3c7f42862cfe6b40d0d Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Fri, 12 Oct 2018 14:01:26 +0800
Subject: jffs2: return -ERANGE when xattr buffer is too small

When a file have multiple xattrs and the passed buffer is
smaller than the required size, jffs2_listxattr() should
return -ERANGE instead of continue, else Oops may occur
due to memory corruption.

Also remove the unnecessary check ("rc < 0"), because
xhandle->list(...) will not return an error number.

Spotted by generic/377 in xfstests-dev.

NB: The problem had been fixed by commit 764a5c6b1fa4 ("xattr
handlers: Simplify list operation") in v4.5-rc1, but the
modification in that commit may be too much because it modifies
all file-systems which implement xattr, so I create a single
patch for jffs2 to fix the problem.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/jffs2/xattr.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c
index 4c2c03663533..8e1427762eeb 100644
--- a/fs/jffs2/xattr.c
+++ b/fs/jffs2/xattr.c
@@ -1004,12 +1004,14 @@ ssize_t jffs2_listxattr(struct dentry *dentry, char *buffer, size_t size)
 			rc = xhandle->list(xhandle, dentry, buffer + len,
 					   size - len, xd->xname,
 					   xd->name_len);
+			if (rc > size - len) {
+				rc = -ERANGE;
+				goto out;
+			}
 		} else {
 			rc = xhandle->list(xhandle, dentry, NULL, 0,
 					   xd->xname, xd->name_len);
 		}
-		if (rc < 0)
-			goto out;
 		len += rc;
 	}
 	rc = len;
-- 
cgit v1.2.3


From c1504091b9805d131abb36a1f01af7a5c99ac9fc Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Wed, 17 Aug 2016 21:58:33 -0400
Subject: btrfs: don't create or leak aliased root while cleaning up orphans

[ Upstream commit 35bbb97fc898aeb874cb7c8b746f091caa359994 ]

commit 909c3a22da3 (Btrfs: fix loading of orphan roots leading to BUG_ON)
avoids the BUG_ON but can add an aliased root to the dead_roots list or
leak the root.

Since we've already been loading roots into the radix tree, we should
use it before looking the root up on disk.

Cc: <stable@vger.kernel.org> # 4.5
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Chris Mason <clm@fb.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/btrfs/disk-io.c   |  4 ++--
 fs/btrfs/disk-io.h   |  2 ++
 fs/btrfs/root-tree.c | 27 ++++++++++++++++++---------
 3 files changed, 22 insertions(+), 11 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index ae6e3a30e61e..8dbb00fbb00b 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1608,8 +1608,8 @@ fail:
 	return ret;
 }
 
-static struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
-					       u64 root_id)
+struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+					u64 root_id)
 {
 	struct btrfs_root *root;
 
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index adeb31830b9c..3c9819403487 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -68,6 +68,8 @@ struct extent_buffer *btrfs_find_tree_block(struct btrfs_fs_info *fs_info,
 struct btrfs_root *btrfs_read_fs_root(struct btrfs_root *tree_root,
 				      struct btrfs_key *location);
 int btrfs_init_fs_root(struct btrfs_root *root);
+struct btrfs_root *btrfs_lookup_fs_root(struct btrfs_fs_info *fs_info,
+					u64 root_id);
 int btrfs_insert_fs_root(struct btrfs_fs_info *fs_info,
 			 struct btrfs_root *root);
 void btrfs_free_fs_roots(struct btrfs_fs_info *fs_info);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 2c849b08a91b..6a6efb26d52f 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -272,6 +272,23 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
 		root_key.objectid = key.offset;
 		key.offset++;
 
+		/*
+		 * The root might have been inserted already, as before we look
+		 * for orphan roots, log replay might have happened, which
+		 * triggers a transaction commit and qgroup accounting, which
+		 * in turn reads and inserts fs roots while doing backref
+		 * walking.
+		 */
+		root = btrfs_lookup_fs_root(tree_root->fs_info,
+					    root_key.objectid);
+		if (root) {
+			WARN_ON(!test_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED,
+					  &root->state));
+			if (btrfs_root_refs(&root->root_item) == 0)
+				btrfs_add_dead_root(root);
+			continue;
+		}
+
 		root = btrfs_read_fs_root(tree_root, &root_key);
 		err = PTR_ERR_OR_ZERO(root);
 		if (err && err != -ENOENT) {
@@ -310,16 +327,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root)
 		set_bit(BTRFS_ROOT_ORPHAN_ITEM_INSERTED, &root->state);
 
 		err = btrfs_insert_fs_root(root->fs_info, root);
-		/*
-		 * The root might have been inserted already, as before we look
-		 * for orphan roots, log replay might have happened, which
-		 * triggers a transaction commit and qgroup accounting, which
-		 * in turn reads and inserts fs roots while doing backref
-		 * walking.
-		 */
-		if (err == -EEXIST)
-			err = 0;
 		if (err) {
+			BUG_ON(err == -EEXIST);
 			btrfs_free_fs_root(root);
 			break;
 		}
-- 
cgit v1.2.3


From 22979776093abc47dcbc6a06eab41d24800ec32d Mon Sep 17 00:00:00 2001
From: Mark Syms <mark.syms@citrix.com>
Date: Tue, 29 Nov 2016 11:36:46 +0000
Subject: CIFS: handle guest access errors to Windows shares

[ Upstream commit 40920c2bb119fd49ba03e2f97a172171781be442 ]

Commit 1a967d6c9b39c226be1b45f13acd4d8a5ab3dc44 ("correctly to
anonymous authentication for the NTLM(v2) authentication") introduces
a regression in handling errors related to attempting a guest
connection to a Windows share which requires authentication. This
should result in a permission denied error but actually causes the
kernel module to enter a never-ending loop trying to follow a DFS
referal which doesn't exist.

The base cause of this is the failure now occurs later in the process
during tree connect and not at the session setup setup and all errors
in tree connect are interpreted as needing to follow the DFS paths
which isn't in this case correct. So, check the returned error against
EACCES and fail if this is returned error.

Feedback from Aurelien:

  PS> net user guest /activate:no
    PS> mkdir C:\guestshare
      PS> icacls C:\guestshare /grant 'Everyone:(OI)(CI)F'
        PS> new-smbshare -name guestshare -path C:\guestshare -fullaccess Everyone

        I've tested v3.10, v4.4, master, master+your patch using default options
        (empty or no user "NU") and user=abc (U).

        NT_LOGON_FAILURE in session setup: LF
        This is what you seem to have in 3.10.

        NT_ACCESS_DENIED in tree connect to the share: AD
        This is what you get before your infinite loop.

                     |   NU       U
                     --------------------------------
                     3.10         |   LF       LF
                     4.4          |   LF       LF
                     master       |   AD       LF
                     master+patch |   AD       LF

                     No infinite DFS loop :(
                     All these issues result in mount failing very fast with permission denied.

                     I guess it could be from either the Windows version or the share/folder
                     ACL. A deeper analysis of the packets might reveal more.

                     In any case I did not notice any issues for on a basic DFS setup with
                     the patch so I don't think it introduced any regressions, which is
                     probably all that matters. It still bothers me a little I couldn't hit
                     the bug.

                     I've included kernel output w/ debugging output and network capture of
                     my tests if anyone want to have a look at it. (master+patch = ml-guestfix).

Signed-off-by: Mark Syms <mark.syms@citrix.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Tested-by: Aurelien Aptel <aaptel@suse.com>
Acked-by: Pavel Shilovsky <pshilov@microsoft.com>
Signed-off-by: Steve French <smfrench@gmail.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/cifs/connect.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 077ad3a06c9a..1eeb4780c3ed 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3674,6 +3674,9 @@ try_mount_again:
 	if (IS_ERR(tcon)) {
 		rc = PTR_ERR(tcon);
 		tcon = NULL;
+		if (rc == -EACCES)
+			goto mount_fail_check;
+
 		goto remote_path_check;
 	}
 
-- 
cgit v1.2.3


From 1b6a863ff29ccd7b65b0112bbec1cda26b7f9c4e Mon Sep 17 00:00:00 2001
From: Ashish Samant <ashish.samant@oracle.com>
Date: Wed, 12 Jul 2017 19:26:58 -0700
Subject: fuse: Dont call set_page_dirty_lock() for ITER_BVEC pages for
 async_dio

[ Upstream commit 61c12b49e1c9c77d7a1bcc161de540d0fd21cf0c ]

Commit 8fba54aebbdf ("fuse: direct-io: don't dirty ITER_BVEC pages") fixes
the ITER_BVEC page deadlock for direct io in fuse by checking in
fuse_direct_io(), whether the page is a bvec page or not, before locking
it.  However, this check is missed when the "async_dio" mount option is
enabled.  In this case, set_page_dirty_lock() is called from the req->end
callback in request_end(), when the fuse thread is returning from userspace
to respond to the read request.  This will cause the same deadlock because
the bvec condition is not checked in this path.

Here is the stack of the deadlocked thread, while returning from userspace:

[13706.656686] INFO: task glusterfs:3006 blocked for more than 120 seconds.
[13706.657808] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables
this message.
[13706.658788] glusterfs       D ffffffff816c80f0     0  3006      1
0x00000080
[13706.658797]  ffff8800d6713a58 0000000000000086 ffff8800d9ad7000
ffff8800d9ad5400
[13706.658799]  ffff88011ffd5cc0 ffff8800d6710008 ffff88011fd176c0
7fffffffffffffff
[13706.658801]  0000000000000002 ffffffff816c80f0 ffff8800d6713a78
ffffffff816c790e
[13706.658803] Call Trace:
[13706.658809]  [<ffffffff816c80f0>] ? bit_wait_io_timeout+0x80/0x80
[13706.658811]  [<ffffffff816c790e>] schedule+0x3e/0x90
[13706.658813]  [<ffffffff816ca7e5>] schedule_timeout+0x1b5/0x210
[13706.658816]  [<ffffffff81073ffb>] ? gup_pud_range+0x1db/0x1f0
[13706.658817]  [<ffffffff810668fe>] ? kvm_clock_read+0x1e/0x20
[13706.658819]  [<ffffffff81066909>] ? kvm_clock_get_cycles+0x9/0x10
[13706.658822]  [<ffffffff810f5792>] ? ktime_get+0x52/0xc0
[13706.658824]  [<ffffffff816c6f04>] io_schedule_timeout+0xa4/0x110
[13706.658826]  [<ffffffff816c8126>] bit_wait_io+0x36/0x50
[13706.658828]  [<ffffffff816c7d06>] __wait_on_bit_lock+0x76/0xb0
[13706.658831]  [<ffffffffa0545636>] ? lock_request+0x46/0x70 [fuse]
[13706.658834]  [<ffffffff8118800a>] __lock_page+0xaa/0xb0
[13706.658836]  [<ffffffff810c8500>] ? wake_atomic_t_function+0x40/0x40
[13706.658838]  [<ffffffff81194d08>] set_page_dirty_lock+0x58/0x60
[13706.658841]  [<ffffffffa054d968>] fuse_release_user_pages+0x58/0x70 [fuse]
[13706.658844]  [<ffffffffa0551430>] ? fuse_aio_complete+0x190/0x190 [fuse]
[13706.658847]  [<ffffffffa0551459>] fuse_aio_complete_req+0x29/0x90 [fuse]
[13706.658849]  [<ffffffffa05471e9>] request_end+0xd9/0x190 [fuse]
[13706.658852]  [<ffffffffa0549126>] fuse_dev_do_write+0x336/0x490 [fuse]
[13706.658854]  [<ffffffffa054963e>] fuse_dev_write+0x6e/0xa0 [fuse]
[13706.658857]  [<ffffffff812a9ef3>] ? security_file_permission+0x23/0x90
[13706.658859]  [<ffffffff81205300>] do_iter_readv_writev+0x60/0x90
[13706.658862]  [<ffffffffa05495d0>] ? fuse_dev_splice_write+0x350/0x350
[fuse]
[13706.658863]  [<ffffffff812062a1>] do_readv_writev+0x171/0x1f0
[13706.658866]  [<ffffffff810b3d00>] ? try_to_wake_up+0x210/0x210
[13706.658868]  [<ffffffff81206361>] vfs_writev+0x41/0x50
[13706.658870]  [<ffffffff81206496>] SyS_writev+0x56/0xf0
[13706.658872]  [<ffffffff810257a1>] ? syscall_trace_leave+0xf1/0x160
[13706.658874]  [<ffffffff816cbb2e>] system_call_fastpath+0x12/0x71

Fix this by making should_dirty a fuse_io_priv parameter that can be
checked in fuse_aio_complete_req().

Reported-by: Tiger Yang <tiger.yang@oracle.com>
Signed-off-by: Ashish Samant <ashish.samant@oracle.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/fuse/file.c   | 6 +++---
 fs/fuse/fuse_i.h | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 8577f3ba6dc6..7014318f6d18 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -625,7 +625,7 @@ static void fuse_aio_complete_req(struct fuse_conn *fc, struct fuse_req *req)
 	struct fuse_io_priv *io = req->io;
 	ssize_t pos = -1;
 
-	fuse_release_user_pages(req, !io->write);
+	fuse_release_user_pages(req, io->should_dirty);
 
 	if (io->write) {
 		if (req->misc.write.in.size != req->misc.write.out.size)
@@ -1333,7 +1333,6 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
 		       loff_t *ppos, int flags)
 {
 	int write = flags & FUSE_DIO_WRITE;
-	bool should_dirty = !write && iter_is_iovec(iter);
 	int cuse = flags & FUSE_DIO_CUSE;
 	struct file *file = io->file;
 	struct inode *inode = file->f_mapping->host;
@@ -1362,6 +1361,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
 			mutex_unlock(&inode->i_mutex);
 	}
 
+	io->should_dirty = !write && iter_is_iovec(iter);
 	while (count) {
 		size_t nres;
 		fl_owner_t owner = current->files;
@@ -1378,7 +1378,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
 			nres = fuse_send_read(req, io, pos, nbytes, owner);
 
 		if (!io->async)
-			fuse_release_user_pages(req, should_dirty);
+			fuse_release_user_pages(req, io->should_dirty);
 		if (req->out.h.error) {
 			if (!res)
 				res = req->out.h.error;
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index 7aafe9acc6c0..c6eb35a95fcc 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -252,6 +252,7 @@ struct fuse_io_priv {
 	size_t size;
 	__u64 offset;
 	bool write;
+	bool should_dirty;
 	int err;
 	struct kiocb *iocb;
 	struct file *file;
-- 
cgit v1.2.3


From fa2075baa95c1957b477c830415d4d6b72282ddc Mon Sep 17 00:00:00 2001
From: Khazhismel Kumykov <khazhy@google.com>
Date: Fri, 12 Oct 2018 21:34:40 -0700
Subject: fs/fat/fatent.c: add cond_resched() to fat_count_free_clusters()

[ Upstream commit ac081c3be3fae6d0cc3e1862507fca3862d30b67 ]

On non-preempt kernels this loop can take a long time (more than 50 ticks)
processing through entries.

Link: http://lkml.kernel.org/r/20181010172623.57033-1-khazhy@google.com
Signed-off-by: Khazhismel Kumykov <khazhy@google.com>
Acked-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/fat/fatent.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/fat/fatent.c b/fs/fat/fatent.c
index a70e37c47a78..e3fc477728b3 100644
--- a/fs/fat/fatent.c
+++ b/fs/fat/fatent.c
@@ -681,6 +681,7 @@ int fat_count_free_clusters(struct super_block *sb)
 			if (ops->ent_get(&fatent) == FAT_ENT_FREE)
 				free++;
 		} while (fat_ent_next(sbi, &fatent));
+		cond_resched();
 	}
 	sbi->free_clusters = free;
 	sbi->free_clus_valid = 1;
-- 
cgit v1.2.3


From d1ce094c3c90434844c6580be7290517762422d6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 17 Oct 2018 15:23:26 +0100
Subject: cachefiles: fix the race between cachefiles_bury_object() and
 rmdir(2)

commit 169b803397499be85bdd1e3d07d6f5e3d4bd669e upstream.

the victim might've been rmdir'ed just before the lock_rename();
unlike the normal callers, we do not look the source up after the
parents are locked - we know it beforehand and just recheck that it's
still the child of what used to be its parent.  Unfortunately,
the check is too weak - we don't spot a dead directory since its
->d_parent is unchanged, dentry is positive, etc.  So we sail all
the way to ->rename(), with hosting filesystems _not_ expecting
to be asked renaming an rmdir'ed subdirectory.

The fix is easy, fortunately - the lock on parent is sufficient for
making IS_DEADDIR() on child safe.

Cc: stable@vger.kernel.org
Fixes: 9ae326a69004 (CacheFiles: A cache that backs onto a mounted filesystem)
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cachefiles/namei.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index c43b4b08546b..a5f59eed8287 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -317,7 +317,7 @@ try_again:
 	trap = lock_rename(cache->graveyard, dir);
 
 	/* do some checks before getting the grave dentry */
-	if (rep->d_parent != dir) {
+	if (rep->d_parent != dir || IS_DEADDIR(d_inode(rep))) {
 		/* the entry was probably culled when we dropped the parent dir
 		 * lock */
 		unlock_rename(cache->graveyard, dir);
-- 
cgit v1.2.3


From 85b89ccf86cececb3e562b071d078a3cf50a54cc Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Sat, 6 Oct 2018 17:09:35 +0800
Subject: jffs2: free jffs2_sb_info through jffs2_kill_sb()

commit 92e2921f7eee63450a5f953f4b15dc6210219430 upstream.

When an invalid mount option is passed to jffs2, jffs2_parse_options()
will fail and jffs2_sb_info will be freed, but then jffs2_sb_info will
be used (use-after-free) and freeed (double-free) in jffs2_kill_sb().

Fix it by removing the buggy invocation of kfree() when getting invalid
mount options.

Fixes: 92abc475d8de ("jffs2: implement mount option parsing and compression overriding")
Cc: stable@kernel.org
Signed-off-by: Hou Tao <houtao1@huawei.com>
Reviewed-by: Richard Weinberger <richard@nod.at>
Signed-off-by: Boris Brezillon <boris.brezillon@bootlin.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/jffs2/super.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 600da1a4df29..1544f530ccd0 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -285,10 +285,8 @@ static int jffs2_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_fs_info = c;
 
 	ret = jffs2_parse_options(c, data);
-	if (ret) {
-		kfree(c);
+	if (ret)
 		return -EINVAL;
-	}
 
 	/* Initialize JFFS2 superblock locks, the further initialization will
 	 * be done later */
-- 
cgit v1.2.3


From c87fb36e0dfd35c3c2c454a3f8c6d7018fcde400 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 2 Oct 2018 01:34:44 -0400
Subject: ext4: fix argument checking in EXT4_IOC_MOVE_EXT

[ Upstream commit f18b2b83a727a3db208308057d2c7945f368e625 ]

If the starting block number of either the source or destination file
exceeds the EOF, EXT4_IOC_MOVE_EXT should return EINVAL.

Also fixed the helper function mext_check_coverage() so that if the
logical block is beyond EOF, make it return immediately, instead of
looping until the block number wraps all the away around.  This takes
long enough that if there are multiple threads trying to do pound on
an the same inode doing non-sensical things, it can end up triggering
the kernel's soft lockup detector.

Reported-by: syzbot+c61979f6f2cba5cb3c06@syzkaller.appspotmail.com
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/move_extent.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 05048fcfd602..6b5e2eddd8d7 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -526,9 +526,13 @@ mext_check_arguments(struct inode *orig_inode,
 			orig_inode->i_ino, donor_inode->i_ino);
 		return -EINVAL;
 	}
-	if (orig_eof < orig_start + *len - 1)
+	if (orig_eof <= orig_start)
+		*len = 0;
+	else if (orig_eof < orig_start + *len - 1)
 		*len = orig_eof - orig_start;
-	if (donor_eof < donor_start + *len - 1)
+	if (donor_eof <= donor_start)
+		*len = 0;
+	else if (donor_eof < donor_start + *len - 1)
 		*len = donor_eof - donor_start;
 	if (!*len) {
 		ext4_debug("ext4 move extent: len should not be 0 "
-- 
cgit v1.2.3


From f4af4c7329886024c122cdd88559db4d5b93dc81 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 5 Oct 2018 18:44:40 -0400
Subject: jbd2: fix use after free in jbd2_log_do_checkpoint()

commit ccd3c4373eacb044eb3832966299d13d2631f66f upstream.

The code cleaning transaction's lists of checkpoint buffers has a bug
where it increases bh refcount only after releasing
journal->j_list_lock. Thus the following race is possible:

CPU0					CPU1
jbd2_log_do_checkpoint()
					jbd2_journal_try_to_free_buffers()
					  __journal_try_to_free_buffer(bh)
  ...
  while (transaction->t_checkpoint_io_list)
  ...
    if (buffer_locked(bh)) {

<-- IO completes now, buffer gets unlocked -->

      spin_unlock(&journal->j_list_lock);
					    spin_lock(&journal->j_list_lock);
					    __jbd2_journal_remove_checkpoint(jh);
					    spin_unlock(&journal->j_list_lock);
					  try_to_free_buffers(page);
      get_bh(bh) <-- accesses freed bh

Fix the problem by grabbing bh reference before unlocking
journal->j_list_lock.

Fixes: dc6e8d669cf5 ("jbd2: don't call get_bh() before calling __jbd2_journal_remove_checkpoint()")
Fixes: be1158cc615f ("jbd2: fold __process_buffer() into jbd2_log_do_checkpoint()")
Reported-by: syzbot+7f4a27091759e2fe7453@syzkaller.appspotmail.com
CC: stable@vger.kernel.org
Reviewed-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/jbd2/checkpoint.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 684996c8a3a4..4d5a5a4cc017 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -254,8 +254,8 @@ restart:
 		bh = jh2bh(jh);
 
 		if (buffer_locked(bh)) {
-			spin_unlock(&journal->j_list_lock);
 			get_bh(bh);
+			spin_unlock(&journal->j_list_lock);
 			wait_on_buffer(bh);
 			/* the journal_head may have gone by now */
 			BUFFER_TRACE(bh, "brelse");
@@ -336,8 +336,8 @@ restart2:
 		jh = transaction->t_checkpoint_io_list;
 		bh = jh2bh(jh);
 		if (buffer_locked(bh)) {
-			spin_unlock(&journal->j_list_lock);
 			get_bh(bh);
+			spin_unlock(&journal->j_list_lock);
 			wait_on_buffer(bh);
 			/* the journal_head may have gone by now */
 			BUFFER_TRACE(bh, "brelse");
-- 
cgit v1.2.3


From 82a76725f960854d275991bb0c1f8adb44b3c1eb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 13 Oct 2018 00:19:13 -0400
Subject: gfs2_meta: ->mount() can get NULL dev_name

commit 3df629d873f8683af6f0d34dfc743f637966d483 upstream.

get in sync with mount_bdev() handling of the same

Reported-by: syzbot+c54f8e94e6bba03b04e9@syzkaller.appspotmail.com
Cc: stable@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/gfs2/ops_fstype.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index baab99b69d8a..d9178388cf48 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1353,6 +1353,9 @@ static struct dentry *gfs2_mount_meta(struct file_system_type *fs_type,
 	struct path path;
 	int error;
 
+	if (!dev_name || !*dev_name)
+		return ERR_PTR(-EINVAL);
+
 	error = kern_path(dev_name, LOOKUP_FOLLOW, &path);
 	if (error) {
 		pr_warn("path_lookup on %s returned error %d\n",
-- 
cgit v1.2.3


From d396e5395271f59bbe9069ebcc14a0fa5d9c85e7 Mon Sep 17 00:00:00 2001
From: Lukas Czerner <lczerner@redhat.com>
Date: Tue, 2 Oct 2018 21:18:45 -0400
Subject: ext4: initialize retries variable in
 ext4_da_write_inline_data_begin()

commit 625ef8a3acd111d5f496d190baf99d1a815bd03e upstream.

Variable retries is not initialized in ext4_da_write_inline_data_begin()
which can lead to nondeterministic number of retries in case we hit
ENOSPC. Initialize retries to zero as we do everywhere else.

Signed-off-by: Lukas Czerner <lczerner@redhat.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Fixes: bc0ca9df3b2a ("ext4: retry allocation when inline->extent conversion failed")
Cc: stable@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/inline.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 1e7a9774119c..34c365f0e69e 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -859,7 +859,7 @@ int ext4_da_write_inline_data_begin(struct address_space *mapping,
 	handle_t *handle;
 	struct page *page;
 	struct ext4_iloc iloc;
-	int retries;
+	int retries = 0;
 
 	ret = ext4_get_inode_loc(inode, &iloc);
 	if (ret)
-- 
cgit v1.2.3


From 39d6c4cdcf67e3b7f91822d138b7d7f870c295be Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sat, 15 Sep 2018 23:04:41 -0500
Subject: smb3: allow stats which track session and share reconnects to be
 reset

commit 2c887635cd6ab3af619dc2be94e5bf8f2e172b78 upstream.

Currently, "echo 0 > /proc/fs/cifs/Stats" resets all of the stats
except the session and share reconnect counts.  Fix it to
reset those as well.

CC: Stable <stable@vger.kernel.org>
Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Aurelien Aptel <aaptel@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/cifs_debug.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'fs')

diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 0e72a14228f8..7bc6d27d47a4 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -285,6 +285,9 @@ static ssize_t cifs_stats_proc_write(struct file *file,
 		atomic_set(&totBufAllocCount, 0);
 		atomic_set(&totSmBufAllocCount, 0);
 #endif /* CONFIG_CIFS_STATS2 */
+		atomic_set(&tcpSesReconnectCount, 0);
+		atomic_set(&tconInfoReconnectCount, 0);
+
 		spin_lock(&GlobalMid_Lock);
 		GlobalMaxActiveXid = 0;
 		GlobalCurrentXid = 0;
-- 
cgit v1.2.3


From ae83508da422b1c6ec5a1409ca15c0f5f33cff31 Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Fri, 19 Oct 2018 00:45:21 -0500
Subject: smb3: do not attempt cifs operation in smb3 query info error path

commit 1e77a8c204c9d1b655c61751b8ad0fde22421dbb upstream.

If backupuid mount option is sent, we can incorrectly retry
(on access denied on query info) with a cifs (FindFirst) operation
on an smb3 mount which causes the server to force the session close.

We set backup intent on open so no need for this fallback.

See kernel bugzilla 201435

Signed-off-by: Steve French <stfrench@microsoft.com>
CC: Stable <stable@vger.kernel.org>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/inode.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 36c8594bb147..5c3187df9ab9 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -756,7 +756,15 @@ cifs_get_inode_info(struct inode **inode, const char *full_path,
 	} else if (rc == -EREMOTE) {
 		cifs_create_dfs_fattr(&fattr, sb);
 		rc = 0;
-	} else if (rc == -EACCES && backup_cred(cifs_sb)) {
+	} else if ((rc == -EACCES) && backup_cred(cifs_sb) &&
+		   (strcmp(server->vals->version_string, SMB1_VERSION_STRING)
+		      == 0)) {
+			/*
+			 * For SMB2 and later the backup intent flag is already
+			 * sent if needed on open and there is no path based
+			 * FindFirst operation to use to retry with
+			 */
+
 			srchinf = kzalloc(sizeof(struct cifs_search_info),
 						GFP_KERNEL);
 			if (srchinf == NULL) {
-- 
cgit v1.2.3


From aa21d67d7fe74897df53cbd22c8be21f8928e07d Mon Sep 17 00:00:00 2001
From: Steve French <stfrench@microsoft.com>
Date: Sun, 28 Oct 2018 13:13:23 -0500
Subject: smb3: on kerberos mount if server doesn't specify auth type use krb5

commit 926674de6705f0f1dbf29a62fd758d0977f535d6 upstream.

Some servers (e.g. Azure) do not include a spnego blob in the SMB3
negotiate protocol response, so on kerberos mounts ("sec=krb5")
we can fail, as we expected the server to list its supported
auth types (OIDs in the spnego blob in the negprot response).
Change this so that on krb5 mounts we default to trying krb5 if the
server doesn't list its supported protocol mechanisms.

Signed-off-by: Steve French <stfrench@microsoft.com>
Reviewed-by: Ronnie Sahlberg <lsahlber@redhat.com>
CC: Stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cifs/cifs_spnego.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index 6908080e9b6d..e3f2b7370bd8 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -143,8 +143,10 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo)
 		sprintf(dp, ";sec=krb5");
 	else if (server->sec_mskerberos)
 		sprintf(dp, ";sec=mskrb5");
-	else
-		goto out;
+	else {
+		cifs_dbg(VFS, "unknown or missing server auth type, use krb5\n");
+		sprintf(dp, ";sec=krb5");
+	}
 
 	dp = description + strlen(description);
 	sprintf(dp, ";uid=0x%x",
-- 
cgit v1.2.3


From a88fd5847b939a87f4814b41814c74ec1d2a5309 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Tue, 18 Sep 2018 10:07:44 -0400
Subject: NFSv4.1: Fix the r/wsize checking

commit 943cff67b842839f4f35364ba2db5c2d3f025d94 upstream.

The intention of nfs4_session_set_rwsize() was to cap the r/wsize to the
buffer sizes negotiated by the CREATE_SESSION. The initial code had a
bug whereby we would not check the values negotiated by nfs_probe_fsinfo()
(the assumption being that CREATE_SESSION will always negotiate buffer values
that are sane w.r.t. the server's preferred r/wsizes) but would only check
values set by the user in the 'mount' command.

The code was changed in 4.11 to _always_ set the r/wsize, meaning that we
now never use the server preferred r/wsizes. This is the regression that
this patch fixes.
Also rename the function to nfs4_session_limit_rwsize() in order to avoid
future confusion.

Fixes: 033853325fe3 (NFSv4.1 respect server's max size in CREATE_SESSION")
Cc: stable@vger.kernel.org # v4.11+
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/nfs/nfs4client.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 63498e1a542a..ae91d1e450be 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -879,10 +879,10 @@ EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
 
 /*
  * Session has been established, and the client marked ready.
- * Set the mount rsize and wsize with negotiated fore channel
- * attributes which will be bound checked in nfs_server_set_fsinfo.
+ * Limit the mount rsize, wsize and dtsize using negotiated fore
+ * channel attributes.
  */
-static void nfs4_session_set_rwsize(struct nfs_server *server)
+static void nfs4_session_limit_rwsize(struct nfs_server *server)
 {
 #ifdef CONFIG_NFS_V4_1
 	struct nfs4_session *sess;
@@ -895,9 +895,11 @@ static void nfs4_session_set_rwsize(struct nfs_server *server)
 	server_resp_sz = sess->fc_attrs.max_resp_sz - nfs41_maxread_overhead;
 	server_rqst_sz = sess->fc_attrs.max_rqst_sz - nfs41_maxwrite_overhead;
 
-	if (!server->rsize || server->rsize > server_resp_sz)
+	if (server->dtsize > server_resp_sz)
+		server->dtsize = server_resp_sz;
+	if (server->rsize > server_resp_sz)
 		server->rsize = server_resp_sz;
-	if (!server->wsize || server->wsize > server_rqst_sz)
+	if (server->wsize > server_rqst_sz)
 		server->wsize = server_rqst_sz;
 #endif /* CONFIG_NFS_V4_1 */
 }
@@ -944,12 +946,12 @@ static int nfs4_server_common_setup(struct nfs_server *server,
 			(unsigned long long) server->fsid.minor);
 	nfs_display_fhandle(mntfh, "Pseudo-fs root FH");
 
-	nfs4_session_set_rwsize(server);
-
 	error = nfs_probe_fsinfo(server, mntfh, fattr);
 	if (error < 0)
 		goto out;
 
+	nfs4_session_limit_rwsize(server);
+
 	if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
 		server->namelen = NFS4_MAXNAMLEN;
 
-- 
cgit v1.2.3


From 637276555f3a9c35dcbe136d1bc108f5f708d500 Mon Sep 17 00:00:00 2001
From: Amir Goldstein <amir73il@gmail.com>
Date: Fri, 28 Sep 2018 20:41:48 +0300
Subject: lockd: fix access beyond unterminated strings in prints

commit 93f38b6fae0ea8987e22d9e6c38f8dfdccd867ee upstream.

printk format used %*s instead of %.*s, so hostname_len does not limit
the number of bytes accessed from hostname.

Signed-off-by: Amir Goldstein <amir73il@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/lockd/host.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index d716c9993a26..c7eb47f2fb6c 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -340,7 +340,7 @@ struct nlm_host *nlmsvc_lookup_host(const struct svc_rqst *rqstp,
 	};
 	struct lockd_net *ln = net_generic(net, lockd_net_id);
 
-	dprintk("lockd: %s(host='%*s', vers=%u, proto=%s)\n", __func__,
+	dprintk("lockd: %s(host='%.*s', vers=%u, proto=%s)\n", __func__,
 			(int)hostname_len, hostname, rqstp->rq_vers,
 			(rqstp->rq_prot == IPPROTO_UDP ? "udp" : "tcp"));
 
-- 
cgit v1.2.3


From 4492b0b5f0a46859b12d67126fd2f68ae2674936 Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Sat, 16 Jun 2018 23:41:59 -0400
Subject: ext4: avoid running out of journal credits when appending to an
 inline file

commit 8bc1379b82b8e809eef77a9fedbb75c6c297be19 upstream.

Use a separate journal transaction if it turns out that we need to
convert an inline file to use an data block.  Otherwise we could end
up failing due to not having journal credits.

This addresses CVE-2018-10883.

https://bugzilla.kernel.org/show_bug.cgi?id=200071

Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org
[fengc@google.com: 4.4 backport: adjust context]
Signed-off-by: Chenbo Feng <fengc@google.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/ext4.h   |  3 ---
 fs/ext4/inline.c | 38 +-------------------------------------
 fs/ext4/xattr.c  | 18 ++----------------
 3 files changed, 3 insertions(+), 56 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f5d9f82b173a..b6e25d771eea 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3039,9 +3039,6 @@ extern struct buffer_head *ext4_get_first_inline_block(struct inode *inode,
 extern int ext4_inline_data_fiemap(struct inode *inode,
 				   struct fiemap_extent_info *fieinfo,
 				   int *has_inline, __u64 start, __u64 len);
-extern int ext4_try_to_evict_inline_data(handle_t *handle,
-					 struct inode *inode,
-					 int needed);
 extern void ext4_inline_data_truncate(struct inode *inode, int *has_inline);
 
 extern int ext4_convert_inline_data(struct inode *inode);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 34c365f0e69e..1aec46733ef8 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -888,11 +888,11 @@ retry_journal:
 	flags |= AOP_FLAG_NOFS;
 
 	if (ret == -ENOSPC) {
+		ext4_journal_stop(handle);
 		ret = ext4_da_convert_inline_data_to_extent(mapping,
 							    inode,
 							    flags,
 							    fsdata);
-		ext4_journal_stop(handle);
 		if (ret == -ENOSPC &&
 		    ext4_should_retry_alloc(inode->i_sb, &retries))
 			goto retry_journal;
@@ -1867,42 +1867,6 @@ out:
 	return (error < 0 ? error : 0);
 }
 
-/*
- * Called during xattr set, and if we can sparse space 'needed',
- * just create the extent tree evict the data to the outer block.
- *
- * We use jbd2 instead of page cache to move data to the 1st block
- * so that the whole transaction can be committed as a whole and
- * the data isn't lost because of the delayed page cache write.
- */
-int ext4_try_to_evict_inline_data(handle_t *handle,
-				  struct inode *inode,
-				  int needed)
-{
-	int error;
-	struct ext4_xattr_entry *entry;
-	struct ext4_inode *raw_inode;
-	struct ext4_iloc iloc;
-
-	error = ext4_get_inode_loc(inode, &iloc);
-	if (error)
-		return error;
-
-	raw_inode = ext4_raw_inode(&iloc);
-	entry = (struct ext4_xattr_entry *)((void *)raw_inode +
-					    EXT4_I(inode)->i_inline_off);
-	if (EXT4_XATTR_LEN(entry->e_name_len) +
-	    EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)) < needed) {
-		error = -ENOSPC;
-		goto out;
-	}
-
-	error = ext4_convert_inline_data_nolock(handle, inode, &iloc);
-out:
-	brelse(iloc.bh);
-	return error;
-}
-
 void ext4_inline_data_truncate(struct inode *inode, int *has_inline)
 {
 	handle_t *handle;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index d6bae37489af..16cf24dc9c34 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1044,22 +1044,8 @@ int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
 	if (EXT4_I(inode)->i_extra_isize == 0)
 		return -ENOSPC;
 	error = ext4_xattr_set_entry(i, s, inode);
-	if (error) {
-		if (error == -ENOSPC &&
-		    ext4_has_inline_data(inode)) {
-			error = ext4_try_to_evict_inline_data(handle, inode,
-					EXT4_XATTR_LEN(strlen(i->name) +
-					EXT4_XATTR_SIZE(i->value_len)));
-			if (error)
-				return error;
-			error = ext4_xattr_ibody_find(inode, i, is);
-			if (error)
-				return error;
-			error = ext4_xattr_set_entry(i, s, inode);
-		}
-		if (error)
-			return error;
-	}
+	if (error)
+		return error;
 	header = IHDR(inode, ext4_raw_inode(&is->iloc));
 	if (!IS_LAST_ENTRY(s->first)) {
 		header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
-- 
cgit v1.2.3


From ae6e27402c216840e2d361675a684803b319aa27 Mon Sep 17 00:00:00 2001
From: Nicolas Pitre <nicolas.pitre@linaro.org>
Date: Tue, 30 Oct 2018 13:26:15 -0400
Subject: Cramfs: fix abad comparison when wrap-arounds occur

commit 672ca9dd13f1aca0c17516f76fc5b0e8344b3e46 upstream.

It is possible for corrupted filesystem images to produce very large
block offsets that may wrap when a length is added, and wrongly pass
the buffer size test.

Reported-by: Anatoly Trosinenko <anatoly.trosinenko@gmail.com>
Signed-off-by: Nicolas Pitre <nico@linaro.org>
Cc: stable@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/cramfs/inode.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index 355c522f3585..a6c9c2d66af1 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -185,7 +185,8 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
 			continue;
 		blk_offset = (blocknr - buffer_blocknr[i]) << PAGE_CACHE_SHIFT;
 		blk_offset += offset;
-		if (blk_offset + len > BUFFER_SIZE)
+		if (blk_offset > BUFFER_SIZE ||
+		    blk_offset + len > BUFFER_SIZE)
 			continue;
 		return read_buffers[i] + blk_offset;
 	}
-- 
cgit v1.2.3


From dc0a989b23a65501827d02bb94dd6a8d6d3fc8c8 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 21 Aug 2018 09:42:03 +0800
Subject: btrfs: Handle owner mismatch gracefully when walking up tree

commit 65c6e82becec33731f48786e5a30f98662c86b16 upstream.

[BUG]
When mounting certain crafted image, btrfs will trigger kernel BUG_ON()
when trying to recover balance:

  kernel BUG at fs/btrfs/extent-tree.c:8956!
  invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
  CPU: 1 PID: 662 Comm: mount Not tainted 4.18.0-rc1-custom+ #10
  RIP: 0010:walk_up_proc+0x336/0x480 [btrfs]
  RSP: 0018:ffffb53540c9b890 EFLAGS: 00010202
  Call Trace:
   walk_up_tree+0x172/0x1f0 [btrfs]
   btrfs_drop_snapshot+0x3a4/0x830 [btrfs]
   merge_reloc_roots+0xe1/0x1d0 [btrfs]
   btrfs_recover_relocation+0x3ea/0x420 [btrfs]
   open_ctree+0x1af3/0x1dd0 [btrfs]
   btrfs_mount_root+0x66b/0x740 [btrfs]
   mount_fs+0x3b/0x16a
   vfs_kern_mount.part.9+0x54/0x140
   btrfs_mount+0x16d/0x890 [btrfs]
   mount_fs+0x3b/0x16a
   vfs_kern_mount.part.9+0x54/0x140
   do_mount+0x1fd/0xda0
   ksys_mount+0xba/0xd0
   __x64_sys_mount+0x21/0x30
   do_syscall_64+0x60/0x210
   entry_SYSCALL_64_after_hwframe+0x49/0xbe

[CAUSE]
Extent tree corruption.  In this particular case, reloc tree root's
owner is DATA_RELOC_TREE (should be TREE_RELOC), thus its backref is
corrupted and we failed the owner check in walk_up_tree().

[FIX]
It's pretty hard to take care of every extent tree corruption, but at
least we can remove such BUG_ON() and exit more gracefully.

And since in this particular image, DATA_RELOC_TREE and TREE_RELOC share
the same root (which is obviously invalid), we needs to make
__del_reloc_root() more robust to detect such invalid sharing to avoid
possible NULL dereference as root->node can be NULL in this case.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=200411
Reported-by: Xu Wen <wen.xu@gatech.edu>
CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/extent-tree.c | 18 ++++++++++++------
 fs/btrfs/relocation.c  |  2 +-
 2 files changed, 13 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index a72f941ca750..722e11b318a7 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -8704,15 +8704,14 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
 	if (eb == root->node) {
 		if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
 			parent = eb->start;
-		else
-			BUG_ON(root->root_key.objectid !=
-			       btrfs_header_owner(eb));
+		else if (root->root_key.objectid != btrfs_header_owner(eb))
+			goto owner_mismatch;
 	} else {
 		if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
 			parent = path->nodes[level + 1]->start;
-		else
-			BUG_ON(root->root_key.objectid !=
-			       btrfs_header_owner(path->nodes[level + 1]));
+		else if (root->root_key.objectid !=
+			 btrfs_header_owner(path->nodes[level + 1]))
+			goto owner_mismatch;
 	}
 
 	btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1);
@@ -8720,6 +8719,11 @@ out:
 	wc->refs[level] = 0;
 	wc->flags[level] = 0;
 	return 0;
+
+owner_mismatch:
+	btrfs_err_rl(root->fs_info, "unexpected tree owner, have %llu expect %llu",
+		     btrfs_header_owner(eb), root->root_key.objectid);
+	return -EUCLEAN;
 }
 
 static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
@@ -8773,6 +8777,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans,
 			ret = walk_up_proc(trans, root, path, wc);
 			if (ret > 0)
 				return 0;
+			if (ret < 0)
+				return ret;
 
 			if (path->locks[level]) {
 				btrfs_tree_unlock_rw(path->nodes[level],
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index cfe913d2d3df..d6ccfb31aef0 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1318,7 +1318,7 @@ static void __del_reloc_root(struct btrfs_root *root)
 	struct mapping_node *node = NULL;
 	struct reloc_control *rc = root->fs_info->reloc_ctl;
 
-	if (rc) {
+	if (rc && root->node) {
 		spin_lock(&rc->reloc_root_tree.lock);
 		rb_node = tree_search(&rc->reloc_root_tree.rb_root,
 				      root->node->start);
-- 
cgit v1.2.3


From ff58ad5f344d9cd6b706271781b2d11baa32e18d Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Tue, 21 Aug 2018 09:53:47 +0800
Subject: btrfs: locking: Add extra check in btrfs_init_new_buffer() to avoid
 deadlock

commit b72c3aba09a53fc7c1824250d71180ca154517a7 upstream.

[BUG]
For certain crafted image, whose csum root leaf has missing backref, if
we try to trigger write with data csum, it could cause deadlock with the
following kernel WARN_ON():

  WARNING: CPU: 1 PID: 41 at fs/btrfs/locking.c:230 btrfs_tree_lock+0x3e2/0x400
  CPU: 1 PID: 41 Comm: kworker/u4:1 Not tainted 4.18.0-rc1+ #8
  Workqueue: btrfs-endio-write btrfs_endio_write_helper
  RIP: 0010:btrfs_tree_lock+0x3e2/0x400
  Call Trace:
   btrfs_alloc_tree_block+0x39f/0x770
   __btrfs_cow_block+0x285/0x9e0
   btrfs_cow_block+0x191/0x2e0
   btrfs_search_slot+0x492/0x1160
   btrfs_lookup_csum+0xec/0x280
   btrfs_csum_file_blocks+0x2be/0xa60
   add_pending_csums+0xaf/0xf0
   btrfs_finish_ordered_io+0x74b/0xc90
   finish_ordered_fn+0x15/0x20
   normal_work_helper+0xf6/0x500
   btrfs_endio_write_helper+0x12/0x20
   process_one_work+0x302/0x770
   worker_thread+0x81/0x6d0
   kthread+0x180/0x1d0
   ret_from_fork+0x35/0x40

[CAUSE]
That crafted image has missing backref for csum tree root leaf.  And
when we try to allocate new tree block, since there is no
EXTENT/METADATA_ITEM for csum tree root, btrfs consider it's free slot
and use it.

The extent tree of the image looks like:

  Normal image                      |       This fuzzed image
  ----------------------------------+--------------------------------
  BG 29360128                       | BG 29360128
   One empty slot                   |  One empty slot
  29364224: backref to UUID tree    | 29364224: backref to UUID tree
   Two empty slots                  |  Two empty slots
  29376512: backref to CSUM tree    |  One empty slot (bad type) <<<
  29380608: backref to D_RELOC tree | 29380608: backref to D_RELOC tree
  ...                               | ...

Since bytenr 29376512 has no METADATA/EXTENT_ITEM, when btrfs try to
alloc tree block, it's an valid slot for btrfs.

And for finish_ordered_write, when we need to insert csum, we try to CoW
csum tree root.

By accident, empty slots at bytenr BG_OFFSET, BG_OFFSET + 8K,
BG_OFFSET + 12K is already used by tree block COW for other trees, the
next empty slot is BG_OFFSET + 16K, which should be the backref for CSUM
tree.

But due to the bad type, btrfs can recognize it and still consider it as
an empty slot, and will try to use it for csum tree CoW.

Then in the following call trace, we will try to lock the new tree
block, which turns out to be the old csum tree root which is already
locked:

btrfs_search_slot() called on csum tree root, which is at 29376512
|- btrfs_cow_block()
   |- btrfs_set_lock_block()
   |  |- Now locks tree block 29376512 (old csum tree root)
   |- __btrfs_cow_block()
      |- btrfs_alloc_tree_block()
         |- btrfs_reserve_extent()
            | Now it returns tree block 29376512, which extent tree
            | shows its empty slot, but it's already hold by csum tree
            |- btrfs_init_new_buffer()
               |- btrfs_tree_lock()
                  | Triggers WARN_ON(eb->lock_owner == current->pid)
                  |- wait_event()
                     Wait lock owner to release the lock, but it's
                     locked by ourself, so it will deadlock

[FIX]
This patch will do the lock_owner and current->pid check at
btrfs_init_new_buffer().
So above deadlock can be avoided.

Since such problem can only happen in crafted image, we will still
trigger kernel warning for later aborted transaction, but with a little
more meaningful warning message.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=200405
Reported-by: Xu Wen <wen.xu@gatech.edu>
CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/extent-tree.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 722e11b318a7..6f0ecaf09962 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -7835,6 +7835,20 @@ btrfs_init_new_buffer(struct btrfs_trans_handle *trans, struct btrfs_root *root,
 	buf = btrfs_find_create_tree_block(root, bytenr);
 	if (!buf)
 		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * Extra safety check in case the extent tree is corrupted and extent
+	 * allocator chooses to use a tree block which is already used and
+	 * locked.
+	 */
+	if (buf->lock_owner == current->pid) {
+		btrfs_err_rl(root->fs_info,
+"tree block %llu owner %llu already locked by pid=%d, extent tree corruption detected",
+			buf->start, btrfs_header_owner(buf), current->pid);
+		free_extent_buffer(buf);
+		return ERR_PTR(-EUCLEAN);
+	}
+
 	btrfs_set_header_generation(buf, trans->transid);
 	btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level);
 	btrfs_tree_lock(buf);
-- 
cgit v1.2.3


From 98edddde5a2cccf6daf6428d9d03f82027ab62d8 Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Thu, 6 Sep 2018 17:18:14 -0400
Subject: btrfs: iterate all devices during trim, instead of
 fs_devices::alloc_list

commit d4e329de5e5e21594df2e0dd59da9acee71f133b upstream.

btrfs_trim_fs iterates over the fs_devices->alloc_list while holding the
device_list_mutex.  The problem is that ->alloc_list is protected by the
chunk mutex.  We don't want to hold the chunk mutex over the trim of the
entire file system.  Fortunately, the ->dev_list list is protected by
the dev_list mutex and while it will give us all devices, including
read-only devices, we already just skip the read-only devices.  Then we
can continue to take and release the chunk mutex while scanning each
device.

Fixes: 499f377f49f ("btrfs: iterate over unused chunk space in FITRIM")
CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/extent-tree.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 6f0ecaf09962..4ebfe130bee0 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10751,8 +10751,8 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
 	}
 
 	mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
-	devices = &root->fs_info->fs_devices->alloc_list;
-	list_for_each_entry(device, devices, dev_alloc_list) {
+	devices = &root->fs_info->fs_devices->devices;
+	list_for_each_entry(device, devices, dev_list) {
 		ret = btrfs_trim_free_extents(device, range->minlen,
 					      &group_trimmed);
 		if (ret)
-- 
cgit v1.2.3


From 639a61c6bab798193e9e0a60dc2b988ae66847fd Mon Sep 17 00:00:00 2001
From: Jeff Mahoney <jeffm@suse.com>
Date: Thu, 6 Sep 2018 17:18:15 -0400
Subject: btrfs: don't attempt to trim devices that don't support it

commit 0be88e367fd8fbdb45257615d691f4675dda062f upstream.

We check whether any device the file system is using supports discard in
the ioctl call, but then we attempt to trim free extents on every device
regardless of whether discard is supported.  Due to the way we mask off
EOPNOTSUPP, we can end up issuing the trim operations on each free range
on devices that don't support it, just wasting time.

Fixes: 499f377f49f08 ("btrfs: iterate over unused chunk space in FITRIM")
CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Jeff Mahoney <jeffm@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/extent-tree.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4ebfe130bee0..59f0664de70f 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10629,6 +10629,10 @@ static int btrfs_trim_free_extents(struct btrfs_device *device,
 
 	*trimmed = 0;
 
+	/* Discard not supported = nothing to do. */
+	if (!blk_queue_discard(bdev_get_queue(device->bdev)))
+		return 0;
+
 	/* Not writeable = nothing to do. */
 	if (!device->writeable)
 		return 0;
-- 
cgit v1.2.3


From 6968f018a95ad377b644b2f4b449aa8a4f6620d8 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Wed, 12 Sep 2018 10:45:45 -0400
Subject: btrfs: wait on caching when putting the bg cache

commit 3aa7c7a31c26321696b92841d5103461c6f3f517 upstream.

While testing my backport I noticed there was a panic if I ran
generic/416 generic/417 generic/418 all in a row.  This just happened to
uncover a race where we had outstanding IO after we destroy all of our
workqueues, and then we'd go to queue the endio work on those free'd
workqueues.

This is because we aren't waiting for the caching threads to be done
before freeing everything up, so to fix this make sure we wait on any
outstanding caching that's being done before we free up the block group,
so we're sure to be done with all IO by the time we get to
btrfs_stop_all_workers().  This fixes the panic I was seeing
consistently in testing.

------------[ cut here ]------------
kernel BUG at fs/btrfs/volumes.c:6112!
SMP PTI
Modules linked in:
CPU: 1 PID: 27165 Comm: kworker/u4:7 Not tainted 4.16.0-02155-g3553e54a578d-dirty #875
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.11.0-2.el7 04/01/2014
Workqueue: btrfs-cache btrfs_cache_helper
RIP: 0010:btrfs_map_bio+0x346/0x370
RSP: 0000:ffffc900061e79d0 EFLAGS: 00010202
RAX: 0000000000000000 RBX: ffff880071542e00 RCX: 0000000000533000
RDX: ffff88006bb74380 RSI: 0000000000000008 RDI: ffff880078160000
RBP: 0000000000000001 R08: ffff8800781cd200 R09: 0000000000503000
R10: ffff88006cd21200 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: ffff8800781cd200 R15: ffff880071542e00
FS:  0000000000000000(0000) GS:ffff88007fd00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 000000000817ffc4 CR3: 0000000078314000 CR4: 00000000000006e0
DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
Call Trace:
 btree_submit_bio_hook+0x8a/0xd0
 submit_one_bio+0x5d/0x80
 read_extent_buffer_pages+0x18a/0x320
 btree_read_extent_buffer_pages+0xbc/0x200
 ? alloc_extent_buffer+0x359/0x3e0
 read_tree_block+0x3d/0x60
 read_block_for_search.isra.30+0x1a5/0x360
 btrfs_search_slot+0x41b/0xa10
 btrfs_next_old_leaf+0x212/0x470
 caching_thread+0x323/0x490
 normal_work_helper+0xc5/0x310
 process_one_work+0x141/0x340
 worker_thread+0x44/0x3c0
 kthread+0xf8/0x130
 ? process_one_work+0x340/0x340
 ? kthread_bind+0x10/0x10
 ret_from_fork+0x35/0x40
RIP: btrfs_map_bio+0x346/0x370 RSP: ffffc900061e79d0
---[ end trace 827eb13e50846033 ]---
Kernel panic - not syncing: Fatal exception
Kernel Offset: disabled
---[ end Kernel panic - not syncing: Fatal exception

CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Omar Sandoval <osandov@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/extent-tree.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 59f0664de70f..528b68cdc350 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9521,6 +9521,7 @@ void btrfs_put_block_group_cache(struct btrfs_fs_info *info)
 
 		block_group = btrfs_lookup_first_block_group(info, last);
 		while (block_group) {
+			wait_block_group_cache_done(block_group);
 			spin_lock(&block_group->lock);
 			if (block_group->iref)
 				break;
-- 
cgit v1.2.3


From 5e7a422384626abbd41ff229bbf18b7119053b9e Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 28 Sep 2018 07:18:00 -0400
Subject: btrfs: reset max_extent_size on clear in a bitmap

commit 553cceb49681d60975d00892877d4c871bf220f9 upstream.

We need to clear the max_extent_size when we clear bits from a bitmap
since it could have been from the range that contains the
max_extent_size.

CC: stable@vger.kernel.org # 4.4+
Reviewed-by: Liu Bo <bo.liu@linux.alibaba.com>
Signed-off-by: Josef Bacik <jbacik@fb.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/free-space-cache.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 45934deacfd7..2b4c5f298325 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1699,6 +1699,8 @@ static inline void __bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
 	bitmap_clear(info->bitmap, start, count);
 
 	info->bytes -= bytes;
+	if (info->max_extent_size > ctl->unit)
+		info->max_extent_size = 0;
 }
 
 static void bitmap_clear_bits(struct btrfs_free_space_ctl *ctl,
-- 
cgit v1.2.3


From da36a0a52348e6301dba61c5d764ac8275d173a1 Mon Sep 17 00:00:00 2001
From: Josef Bacik <josef@toxicpanda.com>
Date: Fri, 28 Sep 2018 07:18:02 -0400
Subject: btrfs: make sure we create all new block groups

commit 545e3366db823dc3342ca9d7fea803f829c9062f upstream.

Allocating new chunks modifies both the extent and chunk tree, which can
trigger new chunk allocations.  So instead of doing list_for_each_safe,
just do while (!list_empty()) so we make sure we don't exit with other
pending bg's still on our list.

CC: stable@vger.kernel.org # 4.4+
Reviewed-by: Omar Sandoval <osandov@fb.com>
Reviewed-by: Liu Bo <bo.liu@linux.alibaba.com>
Signed-off-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/extent-tree.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 528b68cdc350..80cd28456f08 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9912,7 +9912,7 @@ error:
 void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
 				       struct btrfs_root *root)
 {
-	struct btrfs_block_group_cache *block_group, *tmp;
+	struct btrfs_block_group_cache *block_group;
 	struct btrfs_root *extent_root = root->fs_info->extent_root;
 	struct btrfs_block_group_item item;
 	struct btrfs_key key;
@@ -9920,7 +9920,10 @@ void btrfs_create_pending_block_groups(struct btrfs_trans_handle *trans,
 	bool can_flush_pending_bgs = trans->can_flush_pending_bgs;
 
 	trans->can_flush_pending_bgs = false;
-	list_for_each_entry_safe(block_group, tmp, &trans->new_bgs, bg_list) {
+	while (!list_empty(&trans->new_bgs)) {
+		block_group = list_first_entry(&trans->new_bgs,
+					       struct btrfs_block_group_cache,
+					       bg_list);
 		if (ret)
 			goto next;
 
-- 
cgit v1.2.3


From b1223028e6882ba36430cfb328f66605c7034f1e Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Tue, 9 Oct 2018 15:05:29 +0100
Subject: Btrfs: fix wrong dentries after fsync of file that got its parent
 replaced

commit 0f375eed92b5a407657532637ed9652611a682f5 upstream.

In a scenario like the following:

  mkdir /mnt/A               # inode 258
  mkdir /mnt/B               # inode 259
  touch /mnt/B/bar           # inode 260

  sync

  mv /mnt/B/bar /mnt/A/bar
  mv -T /mnt/A /mnt/B
  fsync /mnt/B/bar

  <power fail>

After replaying the log we end up with file bar having 2 hard links, both
with the name 'bar' and one in the directory with inode number 258 and the
other in the directory with inode number 259. Also, we end up with the
directory inode 259 still existing and with the directory inode 258 still
named as 'A', instead of 'B'. In this scenario, file 'bar' should only
have one hard link, located at directory inode 258, the directory inode
259 should not exist anymore and the name for directory inode 258 should
be 'B'.

This incorrect behaviour happens because when attempting to log the old
parents of an inode, we skip any parents that no longer exist. Fix this
by forcing a full commit if an old parent no longer exists.

A test case for fstests follows soon.

CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/tree-log.c | 30 +++++++++++++++++++++++++++---
 1 file changed, 27 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 2c7f9a5f8717..63f59f17c97e 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -5240,9 +5240,33 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
 
 			dir_inode = btrfs_iget(root->fs_info->sb, &inode_key,
 					       root, NULL);
-			/* If parent inode was deleted, skip it. */
-			if (IS_ERR(dir_inode))
-				continue;
+			/*
+			 * If the parent inode was deleted, return an error to
+			 * fallback to a transaction commit. This is to prevent
+			 * getting an inode that was moved from one parent A to
+			 * a parent B, got its former parent A deleted and then
+			 * it got fsync'ed, from existing at both parents after
+			 * a log replay (and the old parent still existing).
+			 * Example:
+			 *
+			 * mkdir /mnt/A
+			 * mkdir /mnt/B
+			 * touch /mnt/B/bar
+			 * sync
+			 * mv /mnt/B/bar /mnt/A/bar
+			 * mv -T /mnt/A /mnt/B
+			 * fsync /mnt/B/bar
+			 * <power fail>
+			 *
+			 * If we ignore the old parent B which got deleted,
+			 * after a log replay we would have file bar linked
+			 * at both parents and the old parent B would still
+			 * exist.
+			 */
+			if (IS_ERR(dir_inode)) {
+				ret = PTR_ERR(dir_inode);
+				goto out;
+			}
 
 			ret = btrfs_log_inode(trans, root, dir_inode,
 					      LOG_INODE_ALL, 0, LLONG_MAX, ctx);
-- 
cgit v1.2.3


From b5bb62e56134180d802649a2954163454e551b7a Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Fri, 10 Aug 2018 10:20:26 +0800
Subject: btrfs: qgroup: Dirty all qgroups before rescan

commit 9c7b0c2e8dbfbcd80a71e2cbfe02704f26c185c6 upstream.

[BUG]
In the following case, rescan won't zero out the number of qgroup 1/0:

  $ mkfs.btrfs -fq $DEV
  $ mount $DEV /mnt

  $ btrfs quota enable /mnt
  $ btrfs qgroup create 1/0 /mnt
  $ btrfs sub create /mnt/sub
  $ btrfs qgroup assign 0/257 1/0 /mnt

  $ dd if=/dev/urandom of=/mnt/sub/file bs=1k count=1000
  $ btrfs sub snap /mnt/sub /mnt/snap
  $ btrfs quota rescan -w /mnt
  $ btrfs qgroup show -pcre /mnt
  qgroupid         rfer         excl     max_rfer     max_excl parent  child
  --------         ----         ----     --------     -------- ------  -----
  0/5          16.00KiB     16.00KiB         none         none ---     ---
  0/257      1016.00KiB     16.00KiB         none         none 1/0     ---
  0/258      1016.00KiB     16.00KiB         none         none ---     ---
  1/0        1016.00KiB     16.00KiB         none         none ---     0/257

So far so good, but:

  $ btrfs qgroup remove 0/257 1/0 /mnt
  WARNING: quotas may be inconsistent, rescan needed
  $ btrfs quota rescan -w /mnt
  $ btrfs qgroup show -pcre  /mnt
  qgoupid         rfer         excl     max_rfer     max_excl parent  child
  --------         ----         ----     --------     -------- ------  -----
  0/5          16.00KiB     16.00KiB         none         none ---     ---
  0/257      1016.00KiB     16.00KiB         none         none ---     ---
  0/258      1016.00KiB     16.00KiB         none         none ---     ---
  1/0        1016.00KiB     16.00KiB         none         none ---     ---
	     ^^^^^^^^^^     ^^^^^^^^ not cleared

[CAUSE]
Before rescan we call qgroup_rescan_zero_tracking() to zero out all
qgroups' accounting numbers.

However we don't mark all qgroups dirty, but rely on rescan to do so.

If we have any high level qgroup without children, it won't be marked
dirty during rescan, since we cannot reach that qgroup.

This will cause QGROUP_INFO items of childless qgroups never get updated
in the quota tree, thus their numbers will stay the same in "btrfs
qgroup show" output.

[FIX]
Just mark all qgroups dirty in qgroup_rescan_zero_tracking(), so even if
we have childless qgroups, their QGROUP_INFO items will still get
updated during rescan.

Reported-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Tested-by: Misono Tomohiro <misono.tomohiro@jp.fujitsu.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/qgroup.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index a751937dded5..90e29d40aa82 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -2446,6 +2446,7 @@ qgroup_rescan_zero_tracking(struct btrfs_fs_info *fs_info)
 		qgroup->rfer_cmpr = 0;
 		qgroup->excl = 0;
 		qgroup->excl_cmpr = 0;
+		qgroup_dirty(fs_info, qgroup);
 	}
 	spin_unlock(&fs_info->qgroup_lock);
 }
-- 
cgit v1.2.3


From b38ace86f35a95e7e91f73dd2524da1fd7c3bc35 Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Sat, 13 Oct 2018 00:37:25 +0100
Subject: Btrfs: fix null pointer dereference on compressed write path error

commit 3527a018c00e5dbada2f9d7ed5576437b6dd5cfb upstream.

At inode.c:compress_file_range(), under the "free_pages_out" label, we can
end up dereferencing the "pages" pointer when it has a NULL value. This
case happens when "start" has a value of 0 and we fail to allocate memory
for the "pages" pointer. When that happens we jump to the "cont" label and
then enter the "if (start == 0)" branch where we immediately call the
cow_file_range_inline() function. If that function returns 0 (success
creating an inline extent) or an error (like -ENOMEM for example) we jump
to the "free_pages_out" label and then access "pages[i]" leading to a NULL
pointer dereference, since "nr_pages" has a value greater than zero at
that point.

Fix this by setting "nr_pages" to 0 when we fail to allocate memory for
the "pages" pointer.

Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=201119
Fixes: 771ed689d2cd ("Btrfs: Optimize compressed writeback and reads")
CC: stable@vger.kernel.org # 4.4+
Reviewed-by: Liu Bo <bo.liu@linux.alibaba.com>
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/inode.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index b895be3d4311..383717ccecc7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -481,6 +481,7 @@ again:
 		pages = kcalloc(nr_pages, sizeof(struct page *), GFP_NOFS);
 		if (!pages) {
 			/* just bail out to the uncompressed code */
+			nr_pages = 0;
 			goto cont;
 		}
 
-- 
cgit v1.2.3


From bddfd0a2d61e3f627b50d88b713769583f7d7b03 Mon Sep 17 00:00:00 2001
From: Josef Bacik <jbacik@fb.com>
Date: Fri, 12 Oct 2018 15:32:33 -0400
Subject: btrfs: set max_extent_size properly

commit ad22cf6ea47fa20fbe11ac324a0a15c0a9a4a2a9 upstream.

We can't use entry->bytes if our entry is a bitmap entry, we need to use
entry->max_extent_size in that case.  Fix up all the logic to make this
consistent.

CC: stable@vger.kernel.org # 4.4+
Signed-off-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/free-space-cache.c | 30 ++++++++++++++++++++----------
 1 file changed, 20 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 2b4c5f298325..1aa897dd9ce3 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1784,6 +1784,13 @@ static int search_bitmap(struct btrfs_free_space_ctl *ctl,
 	return -1;
 }
 
+static inline u64 get_max_extent_size(struct btrfs_free_space *entry)
+{
+	if (entry->bitmap)
+		return entry->max_extent_size;
+	return entry->bytes;
+}
+
 /* Cache the size of the max extent in bytes */
 static struct btrfs_free_space *
 find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
@@ -1805,8 +1812,8 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
 	for (node = &entry->offset_index; node; node = rb_next(node)) {
 		entry = rb_entry(node, struct btrfs_free_space, offset_index);
 		if (entry->bytes < *bytes) {
-			if (entry->bytes > *max_extent_size)
-				*max_extent_size = entry->bytes;
+			*max_extent_size = max(get_max_extent_size(entry),
+					       *max_extent_size);
 			continue;
 		}
 
@@ -1824,8 +1831,8 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
 		}
 
 		if (entry->bytes < *bytes + align_off) {
-			if (entry->bytes > *max_extent_size)
-				*max_extent_size = entry->bytes;
+			*max_extent_size = max(get_max_extent_size(entry),
+					       *max_extent_size);
 			continue;
 		}
 
@@ -1837,8 +1844,10 @@ find_free_space(struct btrfs_free_space_ctl *ctl, u64 *offset, u64 *bytes,
 				*offset = tmp;
 				*bytes = size;
 				return entry;
-			} else if (size > *max_extent_size) {
-				*max_extent_size = size;
+			} else {
+				*max_extent_size =
+					max(get_max_extent_size(entry),
+					    *max_extent_size);
 			}
 			continue;
 		}
@@ -2696,8 +2705,8 @@ static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group,
 
 	err = search_bitmap(ctl, entry, &search_start, &search_bytes, true);
 	if (err) {
-		if (search_bytes > *max_extent_size)
-			*max_extent_size = search_bytes;
+		*max_extent_size = max(get_max_extent_size(entry),
+				       *max_extent_size);
 		return 0;
 	}
 
@@ -2734,8 +2743,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group,
 
 	entry = rb_entry(node, struct btrfs_free_space, offset_index);
 	while (1) {
-		if (entry->bytes < bytes && entry->bytes > *max_extent_size)
-			*max_extent_size = entry->bytes;
+		if (entry->bytes < bytes)
+			*max_extent_size = max(get_max_extent_size(entry),
+					       *max_extent_size);
 
 		if (entry->bytes < bytes ||
 		    (!entry->bitmap && entry->offset < min_start)) {
-- 
cgit v1.2.3


From 55e464966baf3e57374540f058ab68d6dce15f32 Mon Sep 17 00:00:00 2001
From: Dominique Martinet <dominique.martinet@cea.fr>
Date: Sat, 8 Sep 2018 01:18:43 +0900
Subject: 9p locks: fix glock.client_id leak in do_lock

[ Upstream commit b4dc44b3cac9e8327e0655f530ed0c46f2e6214c ]

the 9p client code overwrites our glock.client_id pointing to a static
buffer by an allocated string holding the network provided value which
we do not care about; free and reset the value as appropriate.

This is almost identical to the leak in v9fs_file_getlock() fixed by
Al Viro in commit ce85dd58ad5a6 ("9p: we are leaking glock.client_id
in v9fs_file_getlock()"), which was returned as an error by a coverity
false positive -- while we are here attempt to make the code slightly
more robust to future change of the net/9p/client code and hopefully
more clear to coverity that there is no problem.

Link: http://lkml.kernel.org/r/1536339057-21974-5-git-send-email-asmadeus@codewreck.org
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
Signed-off-by: Sasha Levin <sashal@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/9p/vfs_file.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index 12ceaf52dae6..e7b3d2c4472d 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -204,6 +204,14 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl)
 			break;
 		if (schedule_timeout_interruptible(P9_LOCK_TIMEOUT) != 0)
 			break;
+		/*
+		 * p9_client_lock_dotl overwrites flock.client_id with the
+		 * server message, free and reuse the client name
+		 */
+		if (flock.client_id != fid->clnt->name) {
+			kfree(flock.client_id);
+			flock.client_id = fid->clnt->name;
+		}
 	}
 
 	/* map 9p status to VFS status */
@@ -235,6 +243,8 @@ out_unlock:
 		locks_lock_file_wait(filp, fl);
 		fl->fl_type = fl_type;
 	}
+	if (flock.client_id != fid->clnt->name)
+		kfree(flock.client_id);
 out:
 	return res;
 }
@@ -269,7 +279,7 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
 
 	res = p9_client_getlock_dotl(fid, &glock);
 	if (res < 0)
-		return res;
+		goto out;
 	/* map 9p lock type to os lock type */
 	switch (glock.type) {
 	case P9_LOCK_TYPE_RDLCK:
@@ -290,7 +300,9 @@ static int v9fs_file_getlock(struct file *filp, struct file_lock *fl)
 			fl->fl_end = glock.start + glock.length - 1;
 		fl->fl_pid = glock.proc_id;
 	}
-	kfree(glock.client_id);
+out:
+	if (glock.client_id != fid->clnt->name)
+		kfree(glock.client_id);
 	return res;
 }
 
-- 
cgit v1.2.3


From 7574afe0cfc0e103f309a721880d195f38b292e0 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 25 Sep 2018 12:28:55 +0300
Subject: fuse: Fix use-after-free in fuse_dev_do_read()

commit bc78abbd55dd28e2287ec6d6502b842321a17c87 upstream.

We may pick freed req in this way:

[cpu0]                                  [cpu1]
fuse_dev_do_read()                      fuse_dev_do_write()
   list_move_tail(&req->list, ...);     ...
   spin_unlock(&fpq->lock);             ...
   ...                                  request_end(fc, req);
   ...                                    fuse_put_request(fc, req);
   if (test_bit(FR_INTERRUPTED, ...))
         queue_interrupt(fiq, req);

Fix that by keeping req alive until we finish all manipulations.

Reported-by: syzbot+4e975615ca01f2277bdd@syzkaller.appspotmail.com
Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 46c34a348b0a ("fuse: no fc->lock for pqueue parts")
Cc: <stable@vger.kernel.org> # v4.2
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fuse/dev.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 2671e922c720..f47253a0502b 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1328,12 +1328,14 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
 		goto out_end;
 	}
 	list_move_tail(&req->list, &fpq->processing);
+	__fuse_get_request(req);
 	spin_unlock(&fpq->lock);
 	set_bit(FR_SENT, &req->flags);
 	/* matches barrier in request_wait_answer() */
 	smp_mb__after_atomic();
 	if (test_bit(FR_INTERRUPTED, &req->flags))
 		queue_interrupt(fiq, req);
+	fuse_put_request(fc, req);
 
 	return reqsize;
 
-- 
cgit v1.2.3


From 8bb4354af373a8af395450acd298f25ddb79d93b Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@virtuozzo.com>
Date: Tue, 25 Sep 2018 12:52:42 +0300
Subject: fuse: Fix use-after-free in fuse_dev_do_write()

commit d2d2d4fb1f54eff0f3faa9762d84f6446a4bc5d0 upstream.

After we found req in request_find() and released the lock,
everything may happen with the req in parallel:

cpu0                              cpu1
fuse_dev_do_write()               fuse_dev_do_write()
  req = request_find(fpq, ...)    ...
  spin_unlock(&fpq->lock)         ...
  ...                             req = request_find(fpq, oh.unique)
  ...                             spin_unlock(&fpq->lock)
  queue_interrupt(&fc->iq, req);   ...
  ...                              ...
  ...                              ...
  request_end(fc, req);
    fuse_put_request(fc, req);
  ...                              queue_interrupt(&fc->iq, req);


Signed-off-by: Kirill Tkhai <ktkhai@virtuozzo.com>
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 46c34a348b0a ("fuse: no fc->lock for pqueue parts")
Cc: <stable@vger.kernel.org> # v4.2
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fuse/dev.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index f47253a0502b..78225f1f1902 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1924,16 +1924,20 @@ static ssize_t fuse_dev_do_write(struct fuse_dev *fud,
 
 	/* Is it an interrupt reply? */
 	if (req->intr_unique == oh.unique) {
+		__fuse_get_request(req);
 		spin_unlock(&fpq->lock);
 
 		err = -EINVAL;
-		if (nbytes != sizeof(struct fuse_out_header))
+		if (nbytes != sizeof(struct fuse_out_header)) {
+			fuse_put_request(fc, req);
 			goto err_finish;
+		}
 
 		if (oh.error == -ENOSYS)
 			fc->no_interrupt = 1;
 		else if (oh.error == -EAGAIN)
 			queue_interrupt(&fc->iq, req);
+		fuse_put_request(fc, req);
 
 		fuse_copy_finish(cs);
 		return nbytes;
-- 
cgit v1.2.3


From 2fe23468dae467043183e301829827c17f65f45f Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 28 Sep 2018 16:43:22 +0200
Subject: fuse: fix blocked_waitq wakeup

commit 908a572b80f6e9577b45e81b3dfe2e22111286b8 upstream.

Using waitqueue_active() is racy.  Make sure we issue a wake_up()
unconditionally after storing into fc->blocked.  After that it's okay to
optimize with waitqueue_active() since the first wake up provides the
necessary barrier for all waiters, not the just the woken one.

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Fixes: 3c18ef8117f0 ("fuse: optimize wake_up")
Cc: <stable@vger.kernel.org> # v3.10
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fuse/dev.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 78225f1f1902..da31954e1a99 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -402,12 +402,19 @@ static void request_end(struct fuse_conn *fc, struct fuse_req *req)
 	if (test_bit(FR_BACKGROUND, &req->flags)) {
 		spin_lock(&fc->lock);
 		clear_bit(FR_BACKGROUND, &req->flags);
-		if (fc->num_background == fc->max_background)
+		if (fc->num_background == fc->max_background) {
 			fc->blocked = 0;
-
-		/* Wake up next waiter, if any */
-		if (!fc->blocked && waitqueue_active(&fc->blocked_waitq))
 			wake_up(&fc->blocked_waitq);
+		} else if (!fc->blocked) {
+			/*
+			 * Wake up next waiter, if any.  It's okay to use
+			 * waitqueue_active(), as we've already synced up
+			 * fc->blocked with waiters with the wake_up() call
+			 * above.
+			 */
+			if (waitqueue_active(&fc->blocked_waitq))
+				wake_up(&fc->blocked_waitq);
+		}
 
 		if (fc->num_background == fc->congestion_threshold &&
 		    fc->connected && fc->bdi_initialized) {
-- 
cgit v1.2.3


From f04651b97aed95cc82fc49997de1e5b1a6990e97 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 28 Sep 2018 16:43:22 +0200
Subject: fuse: set FR_SENT while locked

commit 4c316f2f3ff315cb48efb7435621e5bfb81df96d upstream.

Otherwise fuse_dev_do_write() could come in and finish off the request, and
the set_bit(FR_SENT, ...) could trigger the WARN_ON(test_bit(FR_SENT, ...))
in request_end().

Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Reported-by: syzbot+ef054c4d3f64cd7f7cec@syzkaller.appspotmai
Fixes: 46c34a348b0a ("fuse: no fc->lock for pqueue parts")
Cc: <stable@vger.kernel.org> # v4.2
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fuse/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index da31954e1a99..18e2f54737d1 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1336,8 +1336,8 @@ static ssize_t fuse_dev_do_read(struct fuse_dev *fud, struct file *file,
 	}
 	list_move_tail(&req->list, &fpq->processing);
 	__fuse_get_request(req);
-	spin_unlock(&fpq->lock);
 	set_bit(FR_SENT, &req->flags);
+	spin_unlock(&fpq->lock);
 	/* matches barrier in request_wait_answer() */
 	smp_mb__after_atomic();
 	if (test_bit(FR_INTERRUPTED, &req->flags))
-- 
cgit v1.2.3


From cac9339052667de0805dc079b7fd74432acff515 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.com>
Date: Mon, 23 May 2016 16:25:39 -0700
Subject: mm, elf: handle vm_brk error

commit ecc2bc8ac03884266cf73f8a2a42b911465b2fbc upstream.

load_elf_library doesn't handle vm_brk failure although nothing really
indicates it cannot do that because the function is allowed to fail due
to vm_mmap failures already.  This might be not a problem now but later
patch will make vm_brk killable (resp.  mmap_sem for write waiting will
become killable) and so the failure will be more probable.

Signed-off-by: Michal Hocko <mhocko@suse.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/binfmt_elf.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 62bc72001fce..70ea4b9c6dd9 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1215,8 +1215,11 @@ static int load_elf_library(struct file *file)
 	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
 			    ELF_MIN_ALIGN - 1);
 	bss = eppnt->p_memsz + eppnt->p_vaddr;
-	if (bss > len)
-		vm_brk(len, bss - len);
+	if (bss > len) {
+		error = vm_brk(len, bss - len);
+		if (BAD_ADDR(error))
+			goto out_free_ph;
+	}
 	error = 0;
 
 out_free_ph:
-- 
cgit v1.2.3


From 4a0c08d709536d12f0dc2a9e6360826a56c9bceb Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Tue, 2 Aug 2016 14:04:51 -0700
Subject: binfmt_elf: fix calculations for bss padding

commit 0036d1f7eb95bcc52977f15507f00dd07018e7e2 upstream.

A double-bug exists in the bss calculation code, where an overflow can
happen in the "last_bss - elf_bss" calculation, but vm_brk internally
aligns the argument, underflowing it, wrapping back around safe.  We
shouldn't depend on these bugs staying in sync, so this cleans up the
bss padding handling to avoid the overflow.

This moves the bss padzero() before the last_bss > elf_bss case, since
the zero-filling of the ELF_PAGE should have nothing to do with the
relationship of last_bss and elf_bss: any trailing portion should be
zeroed, and a zero size is already handled by padzero().

Then it handles the math on elf_bss vs last_bss correctly.  These need
to both be ELF_PAGE aligned to get the comparison correct, since that's
the expected granularity of the mappings.  Since elf_bss already had
alignment-based padding happen in padzero(), the "start" of the new
vm_brk() should be moved forward as done in the original code.  However,
since the "end" of the vm_brk() area will already become PAGE_ALIGNed in
vm_brk() then last_bss should get aligned here to avoid hiding it as a
side-effect.

Additionally makes a cosmetic change to the initial last_bss calculation
so it's easier to read in comparison to the load_addr calculation above
it (i.e.  the only difference is p_filesz vs p_memsz).

Link: http://lkml.kernel.org/r/1468014494-25291-2-git-send-email-keescook@chromium.org
Signed-off-by: Kees Cook <keescook@chromium.org>
Reported-by: Hector Marco-Gisbert <hecmargi@upv.es>
Cc: Ismael Ripoll Ripoll <iripoll@upv.es>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Chen Gang <gang.chen.5i5j@gmail.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Konstantin Khlebnikov <koct9i@gmail.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/binfmt_elf.c | 34 ++++++++++++++++++----------------
 1 file changed, 18 insertions(+), 16 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 70ea4b9c6dd9..2963a23f7a80 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -604,28 +604,30 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex,
 			 * Do the same thing for the memory mapping - between
 			 * elf_bss and last_bss is the bss section.
 			 */
-			k = load_addr + eppnt->p_memsz + eppnt->p_vaddr;
+			k = load_addr + eppnt->p_vaddr + eppnt->p_memsz;
 			if (k > last_bss)
 				last_bss = k;
 		}
 	}
 
+	/*
+	 * Now fill out the bss section: first pad the last page from
+	 * the file up to the page boundary, and zero it from elf_bss
+	 * up to the end of the page.
+	 */
+	if (padzero(elf_bss)) {
+		error = -EFAULT;
+		goto out;
+	}
+	/*
+	 * Next, align both the file and mem bss up to the page size,
+	 * since this is where elf_bss was just zeroed up to, and where
+	 * last_bss will end after the vm_brk() below.
+	 */
+	elf_bss = ELF_PAGEALIGN(elf_bss);
+	last_bss = ELF_PAGEALIGN(last_bss);
+	/* Finally, if there is still more bss to allocate, do it. */
 	if (last_bss > elf_bss) {
-		/*
-		 * Now fill out the bss section.  First pad the last page up
-		 * to the page boundary, and then perform a mmap to make sure
-		 * that there are zero-mapped pages up to and including the
-		 * last bss page.
-		 */
-		if (padzero(elf_bss)) {
-			error = -EFAULT;
-			goto out;
-		}
-
-		/* What we have mapped so far */
-		elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1);
-
-		/* Map the last of the bss segment */
 		error = vm_brk(elf_bss, last_bss - elf_bss);
 		if (BAD_ADDR(error))
 			goto out;
-- 
cgit v1.2.3


From a0b71d1b894556ae8e346596046e4f8bb0689f43 Mon Sep 17 00:00:00 2001
From: Oscar Salvador <osalvador@suse.de>
Date: Fri, 13 Jul 2018 16:59:13 -0700
Subject: fs, elf: make sure to page align bss in load_elf_library

commit 24962af7e1041b7e50c1bc71d8d10dc678c556b5 upstream.

The current code does not make sure to page align bss before calling
vm_brk(), and this can lead to a VM_BUG_ON() in __mm_populate() due to
the requested lenght not being correctly aligned.

Let us make sure to align it properly.

Kees: only applicable to CONFIG_USELIB kernels: 32-bit and configured
for libc5.

Link: http://lkml.kernel.org/r/20180705145539.9627-1-osalvador@techadventures.net
Signed-off-by: Oscar Salvador <osalvador@suse.de>
Reported-by: syzbot+5dcb560fe12aa5091c06@syzkaller.appspotmail.com
Tested-by: Tetsuo Handa <penguin-kernel@i-love.sakura.ne.jp>
Acked-by: Kees Cook <keescook@chromium.org>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Nicolas Pitre <nicolas.pitre@linaro.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ben Hutchings <ben.hutchings@codethink.co.uk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/binfmt_elf.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 2963a23f7a80..f010d6c8dd14 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -1214,9 +1214,8 @@ static int load_elf_library(struct file *file)
 		goto out_free_ph;
 	}
 
-	len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr +
-			    ELF_MIN_ALIGN - 1);
-	bss = eppnt->p_memsz + eppnt->p_vaddr;
+	len = ELF_PAGEALIGN(eppnt->p_filesz + eppnt->p_vaddr);
+	bss = ELF_PAGEALIGN(eppnt->p_memsz + eppnt->p_vaddr);
 	if (bss > len) {
 		error = vm_brk(len, bss - len);
 		if (BAD_ADDR(error))
-- 
cgit v1.2.3


From 298ed64f1e889b87172e2f9ba807318dadc893d7 Mon Sep 17 00:00:00 2001
From: Changwei Ge <ge.changwei@h3c.com>
Date: Fri, 2 Nov 2018 15:48:15 -0700
Subject: ocfs2: fix a misuse a of brelse after failing ocfs2_check_dir_entry

commit 29aa30167a0a2e6045a0d6d2e89d8168132333d5 upstream.

Somehow, file system metadata was corrupted, which causes
ocfs2_check_dir_entry() to fail in function ocfs2_dir_foreach_blk_el().

According to the original design intention, if above happens we should
skip the problematic block and continue to retrieve dir entry.  But
there is obviouse misuse of brelse around related code.

After failure of ocfs2_check_dir_entry(), current code just moves to
next position and uses the problematic buffer head again and again
during which the problematic buffer head is released for multiple times.
I suppose, this a serious issue which is long-lived in ocfs2.  This may
cause other file systems which is also used in a the same host insane.

So we should also consider about bakcporting this patch into linux
-stable.

Link: http://lkml.kernel.org/r/HK2PR06MB045211675B43EED794E597B6D56E0@HK2PR06MB0452.apcprd06.prod.outlook.com
Signed-off-by: Changwei Ge <ge.changwei@h3c.com>
Suggested-by: Changkuo Shi <shi.changkuo@h3c.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Mark Fasheh <mark@fasheh.com>
Cc: Joel Becker <jlbec@evilplan.org>
Cc: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Joseph Qi <jiangqi903@gmail.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ocfs2/dir.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index ffecf89c8c1c..49af618e410d 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -1896,8 +1896,7 @@ static int ocfs2_dir_foreach_blk_el(struct inode *inode,
 				/* On error, skip the f_pos to the
 				   next block. */
 				ctx->pos = (ctx->pos | (sb->s_blocksize - 1)) + 1;
-				brelse(bh);
-				continue;
+				break;
 			}
 			if (le64_to_cpu(de->inode)) {
 				unsigned char d_type = DT_UNKNOWN;
-- 
cgit v1.2.3


From f02c3c34bda23e96df909d23625073539db19e3d Mon Sep 17 00:00:00 2001
From: Filipe Manana <fdmanana@suse.com>
Date: Mon, 5 Nov 2018 11:14:17 +0000
Subject: Btrfs: fix data corruption due to cloning of eof block

commit ac765f83f1397646c11092a032d4f62c3d478b81 upstream.

We currently allow cloning a range from a file which includes the last
block of the file even if the file's size is not aligned to the block
size. This is fine and useful when the destination file has the same size,
but when it does not and the range ends somewhere in the middle of the
destination file, it leads to corruption because the bytes between the EOF
and the end of the block have undefined data (when there is support for
discard/trimming they have a value of 0x00).

Example:

 $ mkfs.btrfs -f /dev/sdb
 $ mount /dev/sdb /mnt

 $ export foo_size=$((256 * 1024 + 100))
 $ xfs_io -f -c "pwrite -S 0x3c 0 $foo_size" /mnt/foo
 $ xfs_io -f -c "pwrite -S 0xb5 0 1M" /mnt/bar

 $ xfs_io -c "reflink /mnt/foo 0 512K $foo_size" /mnt/bar

 $ od -A d -t x1 /mnt/bar
 0000000 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5
 *
 0524288 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c 3c
 *
 0786528 3c 3c 3c 3c 00 00 00 00 00 00 00 00 00 00 00 00
 0786544 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
 *
 0790528 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5 b5
 *
 1048576

The bytes in the range from 786532 (512Kb + 256Kb + 100 bytes) to 790527
(512Kb + 256Kb + 4Kb - 1) got corrupted, having now a value of 0x00 instead
of 0xb5.

This is similar to the problem we had for deduplication that got recently
fixed by commit de02b9f6bb65 ("Btrfs: fix data corruption when
deduplicating between different files").

Fix this by not allowing such operations to be performed and return the
errno -EINVAL to user space. This is what XFS is doing as well at the VFS
level. This change however now makes us return -EINVAL instead of
-EOPNOTSUPP for cases where the source range maps to an inline extent and
the destination range's end is smaller then the destination file's size,
since the detection of inline extents is done during the actual process of
dropping file extent items (at __btrfs_drop_extents()). Returning the
-EINVAL error is done early on and solely based on the input parameters
(offsets and length) and destination file's size. This makes us consistent
with XFS and anyone else supporting cloning since this case is now checked
at a higher level in the VFS and is where the -EINVAL will be returned
from starting with kernel 4.20 (the VFS changed was introduced in 4.20-rc1
by commit 07d19dc9fbe9 ("vfs: avoid problematic remapping requests into
partial EOF block"). So this change is more geared towards stable kernels,
as it's unlikely the new VFS checks get removed intentionally.

A test case for fstests follows soon, as well as an update to filter
existing tests that expect -EOPNOTSUPP to accept -EINVAL as well.

CC: <stable@vger.kernel.org> # 4.4+
Signed-off-by: Filipe Manana <fdmanana@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/ioctl.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 6caeb946fc1d..150d3c891815 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -3950,9 +3950,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
 		goto out_unlock;
 	if (len == 0)
 		olen = len = src->i_size - off;
-	/* if we extend to eof, continue to block boundary */
-	if (off + len == src->i_size)
+	/*
+	 * If we extend to eof, continue to block boundary if and only if the
+	 * destination end offset matches the destination file's size, otherwise
+	 * we would be corrupting data by placing the eof block into the middle
+	 * of a file.
+	 */
+	if (off + len == src->i_size) {
+		if (!IS_ALIGNED(len, bs) && destoff + len < inode->i_size)
+			goto out_unlock;
 		len = ALIGN(src->i_size, bs) - off;
+	}
 
 	if (len == 0) {
 		ret = 0;
-- 
cgit v1.2.3


From 2eefc69471f17b4b65fc8ba8356b9ee3a7c523d0 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Sat, 3 Nov 2018 17:11:19 -0400
Subject: ext4: add missing brelse() update_backups()'s error path

commit ea0abbb648452cdb6e1734b702b6330a7448fcf8 upstream.

Fixes: ac27a0ec112a ("ext4: initial copy of files from ext3")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 2.6.19
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/resize.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 783280ebc2fe..550eb2dfe357 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1095,8 +1095,10 @@ static void update_backups(struct super_block *sb, sector_t blk_off, char *data,
 			   backup_block, backup_block -
 			   ext4_group_first_block_no(sb, group));
 		BUFFER_TRACE(bh, "get_write_access");
-		if ((err = ext4_journal_get_write_access(handle, bh)))
+		if ((err = ext4_journal_get_write_access(handle, bh))) {
+			brelse(bh);
 			break;
+		}
 		lock_buffer(bh);
 		memcpy(bh->b_data, data, size);
 		if (rest)
-- 
cgit v1.2.3


From 88cc2d51d221bf5bbc3622a47b07ef0947cdbfa1 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Sat, 3 Nov 2018 16:22:10 -0400
Subject: ext4: add missing brelse() in set_flexbg_block_bitmap()'s error path

commit cea5794122125bf67559906a0762186cf417099c upstream.

Fixes: 33afdcc5402d ("ext4: add a function which sets up group blocks ...")
Cc: stable@kernel.org # 3.3
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/resize.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 550eb2dfe357..9646d4b325cb 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -442,16 +442,18 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
 
 		BUFFER_TRACE(bh, "get_write_access");
 		err = ext4_journal_get_write_access(handle, bh);
-		if (err)
+		if (err) {
+			brelse(bh);
 			return err;
+		}
 		ext4_debug("mark block bitmap %#04llx (+%llu/%u)\n", block,
 			   block - start, count2);
 		ext4_set_bits(bh->b_data, block - start, count2);
 
 		err = ext4_handle_dirty_metadata(handle, NULL, bh);
+		brelse(bh);
 		if (unlikely(err))
 			return err;
-		brelse(bh);
 	}
 
 	return 0;
-- 
cgit v1.2.3


From 47aa49f64f0f2b8db7de8eba2e89490e8371fd7e Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Sat, 3 Nov 2018 16:50:08 -0400
Subject: ext4: add missing brelse() add_new_gdb_meta_bg()'s error path

commit 61a9c11e5e7a0dab5381afa5d9d4dd5ebf18f7a0 upstream.

Fixes: 01f795f9e0d6 ("ext4: add online resizing support for meta_bg ...")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 3.7
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/resize.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 9646d4b325cb..f87dc9e4999d 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -899,6 +899,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
 				     sizeof(struct buffer_head *),
 				     GFP_NOFS);
 	if (!n_group_desc) {
+		brelse(gdb_bh);
 		err = -ENOMEM;
 		ext4_warning(sb, "not enough memory for %lu groups",
 			     gdb_num + 1);
@@ -914,8 +915,6 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
 	kvfree(o_group_desc);
 	BUFFER_TRACE(gdb_bh, "get_write_access");
 	err = ext4_journal_get_write_access(handle, gdb_bh);
-	if (unlikely(err))
-		brelse(gdb_bh);
 	return err;
 }
 
-- 
cgit v1.2.3


From d507dfb5e4fe3e5b01259055c7a4fcaba114f89e Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Sat, 3 Nov 2018 16:13:17 -0400
Subject: ext4: avoid potential extra brelse in setup_new_flex_group_blocks()

commit 9e4028935cca3f9ef9b6a90df9da6f1f94853536 upstream.

Currently bh is set to NULL only during first iteration of for cycle,
then this pointer is not cleared after end of using.
Therefore rollback after errors can lead to extra brelse(bh) call,
decrements bh counter and later trigger an unexpected warning in __brelse()

Patch moves brelse() calls in body of cycle to exclude requirement of
brelse() call in rollback.

Fixes: 33afdcc5402d ("ext4: add a function which sets up group blocks ...")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 3.3+
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/resize.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index f87dc9e4999d..37f275375089 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -590,7 +590,6 @@ handle_bb:
 		bh = bclean(handle, sb, block);
 		if (IS_ERR(bh)) {
 			err = PTR_ERR(bh);
-			bh = NULL;
 			goto out;
 		}
 		overhead = ext4_group_overhead_blocks(sb, group);
@@ -602,9 +601,9 @@ handle_bb:
 		ext4_mark_bitmap_end(group_data[i].blocks_count,
 				     sb->s_blocksize * 8, bh->b_data);
 		err = ext4_handle_dirty_metadata(handle, NULL, bh);
+		brelse(bh);
 		if (err)
 			goto out;
-		brelse(bh);
 
 handle_ib:
 		if (bg_flags[i] & EXT4_BG_INODE_UNINIT)
@@ -619,18 +618,16 @@ handle_ib:
 		bh = bclean(handle, sb, block);
 		if (IS_ERR(bh)) {
 			err = PTR_ERR(bh);
-			bh = NULL;
 			goto out;
 		}
 
 		ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb),
 				     sb->s_blocksize * 8, bh->b_data);
 		err = ext4_handle_dirty_metadata(handle, NULL, bh);
+		brelse(bh);
 		if (err)
 			goto out;
-		brelse(bh);
 	}
-	bh = NULL;
 
 	/* Mark group tables in block bitmap */
 	for (j = 0; j < GROUP_TABLE_COUNT; j++) {
@@ -661,7 +658,6 @@ handle_ib:
 	}
 
 out:
-	brelse(bh);
 	err2 = ext4_journal_stop(handle);
 	if (err2 && !err)
 		err = err2;
-- 
cgit v1.2.3


From 36ab0ab9b88d85fcb7b6a86db4b13cbbd52c208a Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Tue, 6 Nov 2018 16:20:40 -0500
Subject: ext4: fix possible inode leak in the retry loop of ext4_resize_fs()

commit db6aee62406d9fbb53315fcddd81f1dc271d49fa upstream.

Fixes: 1c6bd7173d66 ("ext4: convert file system to meta_bg if needed ...")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 3.7
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/resize.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 37f275375089..52c28d37112e 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -2026,6 +2026,10 @@ retry:
 		n_blocks_count_retry = 0;
 		free_flex_gd(flex_gd);
 		flex_gd = NULL;
+		if (resize_inode) {
+			iput(resize_inode);
+			resize_inode = NULL;
+		}
 		goto retry;
 	}
 
-- 
cgit v1.2.3


From e85d624f7164b51b3a9c081d0d9d7e3b5187dab8 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Tue, 6 Nov 2018 17:01:36 -0500
Subject: ext4: avoid buffer leak in ext4_orphan_add() after prior errors

commit feaf264ce7f8d54582e2f66eb82dd9dd124c94f3 upstream.

Fixes: d745a8c20c1f ("ext4: reduce contention on s_orphan_lock")
Fixes: 6e3617e579e0 ("ext4: Handle non empty on-disk orphan link")
Cc: Dmitry Monakhov <dmonakhov@gmail.com>
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 2.6.34
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/namei.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index a1f1e53d0e25..f6a63029b24d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2830,7 +2830,9 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
 			list_del_init(&EXT4_I(inode)->i_orphan);
 			mutex_unlock(&sbi->s_orphan_lock);
 		}
-	}
+	} else
+		brelse(iloc.bh);
+
 	jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
 	jbd_debug(4, "orphan inode %lu will point to %d\n",
 			inode->i_ino, NEXT_ORPHAN(inode));
-- 
cgit v1.2.3


From 7f168837b121b47de9ce22201293819d65f5d6ac Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Tue, 6 Nov 2018 16:16:01 -0500
Subject: ext4: fix missing cleanup if ext4_alloc_flex_bg_array() fails while
 resizing

commit f348e2241fb73515d65b5d77dd9c174128a7fbf2 upstream.

Fixes: 117fff10d7f1 ("ext4: grow the s_flex_groups array as needed ...")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 3.7
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/resize.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 52c28d37112e..82df738e5403 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1990,7 +1990,7 @@ retry:
 
 	err = ext4_alloc_flex_bg_array(sb, n_group + 1);
 	if (err)
-		return err;
+		goto out;
 
 	err = ext4_mb_alloc_groupinfo(sb, n_group + 1);
 	if (err)
-- 
cgit v1.2.3


From 6371a05a1580651c8cd21bb1e49c4985f702c63d Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Tue, 6 Nov 2018 17:18:17 -0500
Subject: ext4: avoid possible double brelse() in add_new_gdb() on error path

commit 4f32c38b4662312dd3c5f113d8bdd459887fb773 upstream.

Fixes: b40971426a83 ("ext4: add error checking to calls to ...")
Reported-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 2.6.38
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/resize.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 82df738e5403..bad13f049fb0 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -844,6 +844,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	err = ext4_handle_dirty_metadata(handle, NULL, gdb_bh);
 	if (unlikely(err)) {
 		ext4_std_error(sb, err);
+		iloc.bh = NULL;
 		goto exit_inode;
 	}
 	brelse(dind);
-- 
cgit v1.2.3


From 761ff77def544b52954334db2923feb1a947cebf Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 7 Nov 2018 10:32:53 -0500
Subject: ext4: fix possible leak of sbi->s_group_desc_leak in error path

commit 9e463084cdb22e0b56b2dfbc50461020409a5fd3 upstream.

Fixes: bfe0a5f47ada ("ext4: add more mount time checks of the superblock")
Reported-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 4.18
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/super.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'fs')

diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a3d905abbaa9..cd9cd581fd92 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3731,6 +3731,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	sbi->s_groups_count = blocks_count;
 	sbi->s_blockfile_groups = min_t(ext4_group_t, sbi->s_groups_count,
 			(EXT4_MAX_BLOCK_FILE_PHYS / EXT4_BLOCKS_PER_GROUP(sb)));
+	if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
+	    le32_to_cpu(es->s_inodes_count)) {
+		ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
+			 le32_to_cpu(es->s_inodes_count),
+			 ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
+		ret = -EINVAL;
+		goto failed_mount;
+	}
 	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
 		   EXT4_DESC_PER_BLOCK(sb);
 	if (ext4_has_feature_meta_bg(sb)) {
@@ -3750,14 +3758,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 		ret = -ENOMEM;
 		goto failed_mount;
 	}
-	if (((u64)sbi->s_groups_count * sbi->s_inodes_per_group) !=
-	    le32_to_cpu(es->s_inodes_count)) {
-		ext4_msg(sb, KERN_ERR, "inodes count not valid: %u vs %llu",
-			 le32_to_cpu(es->s_inodes_count),
-			 ((u64)sbi->s_groups_count * sbi->s_inodes_per_group));
-		ret = -EINVAL;
-		goto failed_mount;
-	}
 
 	bgl_lock_init(sbi->s_blockgroup_lock);
 
-- 
cgit v1.2.3


From 0d9902d43fb696d460c29ef4ee0b86ce6ddc9a15 Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Wed, 7 Nov 2018 11:07:01 -0500
Subject: ext4: release bs.bh before re-using in ext4_xattr_block_find()

commit 45ae932d246f721e6584430017176cbcadfde610 upstream.

bs.bh was taken in previous ext4_xattr_block_find() call,
it should be released before re-using

Fixes: 7e01c8e5420b ("ext3/4: fix uninitialized bs in ...")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 2.6.26
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/xattr.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 16cf24dc9c34..1022c17fc1a1 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1161,6 +1161,8 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 			error = ext4_xattr_block_set(handle, inode, &i, &bs);
 		} else if (error == -ENOSPC) {
 			if (EXT4_I(inode)->i_file_acl && !bs.s.base) {
+				brelse(bs.bh);
+				bs.bh = NULL;
 				error = ext4_xattr_block_find(inode, &i, &bs);
 				if (error)
 					goto cleanup;
-- 
cgit v1.2.3


From 27acd8f869ba92e70408168ed2bed504a99b154c Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Wed, 7 Nov 2018 11:10:21 -0500
Subject: ext4: fix buffer leak in ext4_xattr_move_to_block() on error path

commit 6bdc9977fcdedf47118d2caf7270a19f4b6d8a8f upstream.

Fixes: 3f2571c1f91f ("ext4: factor out xattr moving")
Fixes: 6dd4ee7cab7e ("ext4: Expand extra_inodes space per ...")
Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 2.6.23
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/xattr.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 1022c17fc1a1..53679716baca 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1490,6 +1490,8 @@ cleanup:
 	kfree(buffer);
 	if (is)
 		brelse(is->iloc.bh);
+	if (bs)
+		brelse(bs->bh);
 	kfree(is);
 	kfree(bs);
 	brelse(bh);
-- 
cgit v1.2.3


From 954ff0468f8bc17baed4673b2458cf8377f0b9ea Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Wed, 7 Nov 2018 22:36:23 -0500
Subject: ext4: fix buffer leak in __ext4_read_dirblock() on error path

commit de59fae0043f07de5d25e02ca360f7d57bfa5866 upstream.

Fixes: dc6982ff4db1 ("ext4: refactor code to read directory blocks ...")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@kernel.org # 3.9
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/ext4/namei.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'fs')

diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index f6a63029b24d..aa08e129149d 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -124,6 +124,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
 	if (!is_dx_block && type == INDEX) {
 		ext4_error_inode(inode, func, line, block,
 		       "directory leaf block found instead of index block");
+		brelse(bh);
 		return ERR_PTR(-EFSCORRUPTED);
 	}
 	if (!ext4_has_metadata_csum(inode->i_sb) ||
-- 
cgit v1.2.3


From 5becfca9f9714df49b06562b3b6ab434bbcfd3ae Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Mon, 22 Oct 2018 10:21:38 -0500
Subject: mount: Retest MNT_LOCKED in do_umount

commit 25d202ed820ee347edec0bf3bf553544556bf64b upstream.

It was recently pointed out that the one instance of testing MNT_LOCKED
outside of the namespace_sem is in ksys_umount.

Fix that by adding a test inside of do_umount with namespace_sem and
the mount_lock held.  As it helps to fail fails the existing test is
maintained with an additional comment pointing out that it may be racy
because the locks are not held.

Cc: stable@vger.kernel.org
Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users")
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/namespace.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index b56b50e3da11..416496e2f648 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1584,8 +1584,13 @@ static int do_umount(struct mount *mnt, int flags)
 
 	namespace_lock();
 	lock_mount_hash();
-	event++;
 
+	/* Recheck MNT_LOCKED with the locks held */
+	retval = -EINVAL;
+	if (mnt->mnt.mnt_flags & MNT_LOCKED)
+		goto out;
+
+	event++;
 	if (flags & MNT_DETACH) {
 		if (!list_empty(&mnt->mnt_list))
 			umount_tree(mnt, UMOUNT_PROPAGATE);
@@ -1599,6 +1604,7 @@ static int do_umount(struct mount *mnt, int flags)
 			retval = 0;
 		}
 	}
+out:
 	unlock_mount_hash();
 	namespace_unlock();
 	return retval;
@@ -1681,7 +1687,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
 		goto dput_and_out;
 	if (!check_mnt(mnt))
 		goto dput_and_out;
-	if (mnt->mnt.mnt_flags & MNT_LOCKED)
+	if (mnt->mnt.mnt_flags & MNT_LOCKED) /* Check optimistically */
 		goto dput_and_out;
 	retval = -EPERM;
 	if (flags & MNT_FORCE && !capable(CAP_SYS_ADMIN))
-- 
cgit v1.2.3


From 6258099fd2c9573cde4e3201bef5ef41a56618c5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 25 Oct 2018 09:04:18 -0500
Subject: mount: Don't allow copying MNT_UNBINDABLE|MNT_LOCKED mounts

commit df7342b240185d58d3d9665c0bbf0a0f5570ec29 upstream.

Jonathan Calmels from NVIDIA reported that he's able to bypass the
mount visibility security check in place in the Linux kernel by using
a combination of the unbindable property along with the private mount
propagation option to allow a unprivileged user to see a path which
was purposefully hidden by the root user.

Reproducer:
  # Hide a path to all users using a tmpfs
  root@castiana:~# mount -t tmpfs tmpfs /sys/devices/
  root@castiana:~#

  # As an unprivileged user, unshare user namespace and mount namespace
  stgraber@castiana:~$ unshare -U -m -r

  # Confirm the path is still not accessible
  root@castiana:~# ls /sys/devices/

  # Make /sys recursively unbindable and private
  root@castiana:~# mount --make-runbindable /sys
  root@castiana:~# mount --make-private /sys

  # Recursively bind-mount the rest of /sys over to /mnnt
  root@castiana:~# mount --rbind /sys/ /mnt

  # Access our hidden /sys/device as an unprivileged user
  root@castiana:~# ls /mnt/devices/
  breakpoint cpu cstate_core cstate_pkg i915 intel_pt isa kprobe
  LNXSYSTM:00 msr pci0000:00 platform pnp0 power software system
  tracepoint uncore_arb uncore_cbox_0 uncore_cbox_1 uprobe virtual

Solve this by teaching copy_tree to fail if a mount turns out to be
both unbindable and locked.

Cc: stable@vger.kernel.org
Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users")
Reported-by: Jonathan Calmels <jcalmels@nvidia.com>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/namespace.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index 416496e2f648..2d718d552a37 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1765,8 +1765,14 @@ struct mount *copy_tree(struct mount *mnt, struct dentry *dentry,
 		for (s = r; s; s = next_mnt(s, r)) {
 			if (!(flag & CL_COPY_UNBINDABLE) &&
 			    IS_MNT_UNBINDABLE(s)) {
-				s = skip_mnt_tree(s);
-				continue;
+				if (s->mnt.mnt_flags & MNT_LOCKED) {
+					/* Both unbindable and locked. */
+					q = ERR_PTR(-EPERM);
+					goto out;
+				} else {
+					s = skip_mnt_tree(s);
+					continue;
+				}
 			}
 			if (!(flag & CL_COPY_MNT_NS_FILE) &&
 			    is_mnt_ns_file(s->mnt.mnt_root)) {
-- 
cgit v1.2.3


From f7e6ee2e18cc745f05a9fe678de65ac24a6adcfd Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 25 Oct 2018 12:05:11 -0500
Subject: mount: Prevent MNT_DETACH from disconnecting locked mounts

commit 9c8e0a1b683525464a2abe9fb4b54404a50ed2b4 upstream.

Timothy Baldwin <timbaldwin@fastmail.co.uk> wrote:
> As per mount_namespaces(7) unprivileged users should not be able to look under mount points:
>
>   Mounts that come as a single unit from more privileged mount are locked
>   together and may not be separated in a less privileged mount namespace.
>
> However they can:
>
> 1. Create a mount namespace.
> 2. In the mount namespace open a file descriptor to the parent of a mount point.
> 3. Destroy the mount namespace.
> 4. Use the file descriptor to look under the mount point.
>
> I have reproduced this with Linux 4.16.18 and Linux 4.18-rc8.
>
> The setup:
>
> $ sudo sysctl kernel.unprivileged_userns_clone=1
> kernel.unprivileged_userns_clone = 1
> $ mkdir -p A/B/Secret
> $ sudo mount -t tmpfs hide A/B
>
>
> "Secret" is indeed hidden as expected:
>
> $ ls -lR A
> A:
> total 0
> drwxrwxrwt 2 root root 40 Feb 12 21:08 B
>
> A/B:
> total 0
>
>
> The attack revealing "Secret":
>
> $ unshare -Umr sh -c "exec unshare -m ls -lR /proc/self/fd/4/ 4<A"
> /proc/self/fd/4/:
> total 0
> drwxr-xr-x 3 root root 60 Feb 12 21:08 B
>
> /proc/self/fd/4/B:
> total 0
> drwxr-xr-x 2 root root 40 Feb 12 21:08 Secret
>
> /proc/self/fd/4/B/Secret:
> total 0

I tracked this down to put_mnt_ns running passing UMOUNT_SYNC and
disconnecting all of the mounts in a mount namespace.  Fix this by
factoring drop_mounts out of drop_collected_mounts and passing
0 instead of UMOUNT_SYNC.

There are two possible behavior differences that result from this.
- No longer setting UMOUNT_SYNC will no longer set MNT_SYNC_UMOUNT on
  the vfsmounts being unmounted.  This effects the lazy rcu walk by
  kicking the walk out of rcu mode and forcing it to be a non-lazy
  walk.
- No longer disconnecting locked mounts will keep some mounts around
  longer as they stay because the are locked to other mounts.

There are only two users of drop_collected mounts: audit_tree.c and
put_mnt_ns.

In audit_tree.c the mounts are private and there are no rcu lazy walks
only calls to iterate_mounts. So the changes should have no effect
except for a small timing effect as the connected mounts are disconnected.

In put_mnt_ns there may be references from process outside the mount
namespace to the mounts.  So the mounts remaining connected will
be the bug fix that is needed.  That rcu walks are allowed to continue
appears not to be a problem especially as the rcu walk change was about
an implementation detail not about semantics.

Cc: stable@vger.kernel.org
Fixes: 5ff9d8a65ce8 ("vfs: Lock in place mounts from more privileged users")
Reported-by: Timothy Baldwin <timbaldwin@fastmail.co.uk>
Tested-by: Timothy Baldwin <timbaldwin@fastmail.co.uk>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/namespace.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/namespace.c b/fs/namespace.c
index 2d718d552a37..88c5d5bddf74 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1825,7 +1825,7 @@ void drop_collected_mounts(struct vfsmount *mnt)
 {
 	namespace_lock();
 	lock_mount_hash();
-	umount_tree(real_mount(mnt), UMOUNT_SYNC);
+	umount_tree(real_mount(mnt), 0);
 	unlock_mount_hash();
 	namespace_unlock();
 }
-- 
cgit v1.2.3


From 6023d16fdb84e849d4aa60c2bc1ea294a217d6cb Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@redhat.com>
Date: Fri, 9 Nov 2018 15:52:16 +0100
Subject: fuse: fix leaked notify reply

commit 7fabaf303458fcabb694999d6fa772cc13d4e217 upstream.

fuse_request_send_notify_reply() may fail if the connection was reset for
some reason (e.g. fs was unmounted).  Don't leak request reference in this
case.  Besides leaking memory, this resulted in fc->num_waiting not being
decremented and hence fuse_wait_aborted() left in a hanging and unkillable
state.

Fixes: 2d45ba381a74 ("fuse: add retrieve request")
Fixes: b8f95e5d13f5 ("fuse: umount should wait for all requests")
Reported-and-tested-by: syzbot+6339eda9cb4ebbc4c37b@syzkaller.appspotmail.com
Signed-off-by: Miklos Szeredi <mszeredi@redhat.com>
Cc: <stable@vger.kernel.org> #v2.6.36
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/fuse/dev.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 18e2f54737d1..e566652ac922 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -1771,8 +1771,10 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
 	req->in.args[1].size = total_len;
 
 	err = fuse_request_send_notify_reply(fc, req, outarg->notify_unique);
-	if (err)
+	if (err) {
 		fuse_retrieve_end(fc, req);
+		fuse_put_request(fc, req);
+	}
 
 	return err;
 }
-- 
cgit v1.2.3


From 9546d6609e1095af6a5c8728238234e1b6b4d85c Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Sun, 1 Jul 2018 13:56:54 -0700
Subject: configfs: replace strncpy with memcpy

commit 1823342a1f2b47a4e6f5667f67cd28ab6bc4d6cd upstream.

gcc 8.1.0 complains:

fs/configfs/symlink.c:67:3: warning:
	'strncpy' output truncated before terminating nul copying as many
	bytes from a string as its length
fs/configfs/symlink.c: In function 'configfs_get_link':
fs/configfs/symlink.c:63:13: note: length computed here

Using strncpy() is indeed less than perfect since the length of data to
be copied has already been determined with strlen(). Replace strncpy()
with memcpy() to address the warning and optimize the code a little.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Nobuhiro Iwamatsu <nobuhiro.iwamatsu@cybertrust.co.jp>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/configfs/symlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/configfs/symlink.c b/fs/configfs/symlink.c
index 0525ebc3aea2..66e8c5d58b21 100644
--- a/fs/configfs/symlink.c
+++ b/fs/configfs/symlink.c
@@ -64,7 +64,7 @@ static void fill_item_path(struct config_item * item, char * buffer, int length)
 
 		/* back up enough to print this bus id with '/' */
 		length -= cur;
-		strncpy(buffer + length,config_item_name(p),cur);
+		memcpy(buffer + length, config_item_name(p), cur);
 		*(buffer + --length) = '/';
 	}
 }
-- 
cgit v1.2.3


From 235fbd9c76b5b96bf4b483b65c42e1c2388b3ae8 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Tue, 30 Oct 2018 15:06:38 -0700
Subject: reiserfs: propagate errors from fill_with_dentries() properly

[ Upstream commit b10298d56c9623f9b173f19959732d3184b35f4f ]

fill_with_dentries() failed to propagate errors up to
reiserfs_for_each_xattr() properly.  Plumb them through.

Note that reiserfs_for_each_xattr() is only used by
reiserfs_delete_xattrs() and reiserfs_chown_xattrs().  The result of
reiserfs_delete_xattrs() is discarded anyway, the only difference there is
whether a warning is printed to dmesg.  The result of
reiserfs_chown_xattrs() does matter because it can block chowning of the
file to which the xattrs belong; but either way, the resulting state can
have misaligned ownership, so my patch doesn't improve things greatly.

Credit for making me look at this code goes to Al Viro, who pointed out
that the ->actor calling convention is suboptimal and should be changed.

Link: http://lkml.kernel.org/r/20180802163335.83312-1-jannh@google.com
Signed-off-by: Jann Horn <jannh@google.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Jeff Mahoney <jeffm@suse.com>
Cc: Eric Biggers <ebiggers@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/reiserfs/xattr.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'fs')

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8b32fdaad468..d424b3d4bf3b 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -184,6 +184,7 @@ struct reiserfs_dentry_buf {
 	struct dir_context ctx;
 	struct dentry *xadir;
 	int count;
+	int err;
 	struct dentry *dentries[8];
 };
 
@@ -206,6 +207,7 @@ fill_with_dentries(struct dir_context *ctx, const char *name, int namelen,
 
 	dentry = lookup_one_len(name, dbuf->xadir, namelen);
 	if (IS_ERR(dentry)) {
+		dbuf->err = PTR_ERR(dentry);
 		return PTR_ERR(dentry);
 	} else if (d_really_is_negative(dentry)) {
 		/* A directory entry exists, but no file? */
@@ -214,6 +216,7 @@ fill_with_dentries(struct dir_context *ctx, const char *name, int namelen,
 			       "not found for file %pd.\n",
 			       dentry, dbuf->xadir);
 		dput(dentry);
+		dbuf->err = -EIO;
 		return -EIO;
 	}
 
@@ -261,6 +264,10 @@ static int reiserfs_for_each_xattr(struct inode *inode,
 		err = reiserfs_readdir_inode(d_inode(dir), &buf.ctx);
 		if (err)
 			break;
+		if (buf.err) {
+			err = buf.err;
+			break;
+		}
 		if (!buf.count)
 			break;
 		for (i = 0; !err && i < buf.count && buf.dentries[i]; i++) {
-- 
cgit v1.2.3


From b4cb4ecd6af40b77b39f8028325f36ceff8292fe Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?=
 <ernesto.mnd.fernandez@gmail.com>
Date: Tue, 30 Oct 2018 15:06:07 -0700
Subject: hfs: prevent btree data loss on root split
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit d057c036672f33d43a5f7344acbb08cf3a8a0c09 ]

This bug is triggered whenever hfs_brec_update_parent() needs to split
the root node.  The height of the btree is not increased, which leaves
the new node orphaned and its records lost.  It is not possible for this
to happen on a valid hfs filesystem because the index nodes have fixed
length keys.

For reasons I ignore, the hfs module does have support for a number of
hfsplus features.  A corrupt btree header may report variable length
keys and trigger this bug, so it's better to fix it.

Link: http://lkml.kernel.org/r/9750b1415685c4adca10766895f6d5ef12babdb0.1535682463.git.ernesto.mnd.fernandez@gmail.com
Signed-off-by: Ernesto A. Fernández <ernesto.mnd.fernandez@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/hfs/brec.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/hfs/brec.c b/fs/hfs/brec.c
index 2a6f3c67cb3f..2e713673df42 100644
--- a/fs/hfs/brec.c
+++ b/fs/hfs/brec.c
@@ -424,6 +424,10 @@ skip:
 	if (new_node) {
 		__be32 cnid;
 
+		if (!new_node->parent) {
+			hfs_btree_inc_height(tree);
+			new_node->parent = tree->root;
+		}
 		fd->bnode = hfs_bnode_find(tree, new_node->parent);
 		/* create index key and entry */
 		hfs_bnode_read_key(new_node, fd->search_key, 14);
-- 
cgit v1.2.3


From 5f427ca479c33907abceb3e767cf1a76a5f5bffd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ernesto=20A=2E=20Fern=C3=A1ndez?=
 <ernesto.mnd.fernandez@gmail.com>
Date: Tue, 30 Oct 2018 15:06:00 -0700
Subject: hfsplus: prevent btree data loss on root split
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[ Upstream commit 0a3021d4f5295aa073c7bf5c5e4de60a2e292578 ]

Creating, renaming or deleting a file may cause catalog corruption and
data loss.  This bug is randomly triggered by xfstests generic/027, but
here is a faster reproducer:

  truncate -s 50M fs.iso
  mkfs.hfsplus fs.iso
  mount fs.iso /mnt
  i=100
  while [ $i -le 150 ]; do
    touch /mnt/$i &>/dev/null
    ((++i))
  done
  i=100
  while [ $i -le 150 ]; do
    mv /mnt/$i /mnt/$(perl -e "print $i x82") &>/dev/null
    ((++i))
  done
  umount /mnt
  fsck.hfsplus -n fs.iso

The bug is triggered whenever hfs_brec_update_parent() needs to split the
root node.  The height of the btree is not increased, which leaves the new
node orphaned and its records lost.

Link: http://lkml.kernel.org/r/26d882184fc43043a810114258f45277752186c7.1535682461.git.ernesto.mnd.fernandez@gmail.com
Signed-off-by: Ernesto A. Fernández <ernesto.mnd.fernandez@gmail.com>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/hfsplus/brec.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'fs')

diff --git a/fs/hfsplus/brec.c b/fs/hfsplus/brec.c
index 754fdf8c6356..1002a0c08319 100644
--- a/fs/hfsplus/brec.c
+++ b/fs/hfsplus/brec.c
@@ -427,6 +427,10 @@ skip:
 	if (new_node) {
 		__be32 cnid;
 
+		if (!new_node->parent) {
+			hfs_btree_inc_height(tree);
+			new_node->parent = tree->root;
+		}
 		fd->bnode = hfs_bnode_find(tree, new_node->parent);
 		/* create index key and entry */
 		hfs_bnode_read_key(new_node, fd->search_key, 14);
-- 
cgit v1.2.3


From b45cb18117893965cbe3191f4f308aa7dd3cd689 Mon Sep 17 00:00:00 2001
From: Chengguang Xu <cgxu519@gmx.com>
Date: Wed, 13 Jun 2018 12:05:13 +0800
Subject: fs/exofs: fix potential memory leak in mount option parsing

[ Upstream commit 515f1867addaba49c1c6ac73abfaffbc192c1db4 ]

There are some cases can cause memory leak when parsing
option 'osdname'.

Signed-off-by: Chengguang Xu <cgxu519@gmx.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/exofs/super.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index b795c567b5e1..360ba74e04e6 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -100,6 +100,7 @@ static int parse_options(char *options, struct exofs_mountopt *opts)
 		token = match_token(p, tokens, args);
 		switch (token) {
 		case Opt_name:
+			kfree(opts->dev_name);
 			opts->dev_name = match_strdup(&args[0]);
 			if (unlikely(!opts->dev_name)) {
 				EXOFS_ERR("Error allocating dev_name");
@@ -868,8 +869,10 @@ static struct dentry *exofs_mount(struct file_system_type *type,
 	int ret;
 
 	ret = parse_options(data, &opts);
-	if (ret)
+	if (ret) {
+		kfree(opts.dev_name);
 		return ERR_PTR(ret);
+	}
 
 	if (!opts.dev_name)
 		opts.dev_name = dev_name;
-- 
cgit v1.2.3


From 00112fda0835dda03eb88ad989d735334af9ecbf Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruenba@redhat.com>
Date: Mon, 19 Nov 2018 17:24:36 +0100
Subject: gfs2: Put bitmap buffers in put_super

commit 10283ea525d30f2e99828978fd04d8427876a7ad upstream.

gfs2_put_super calls gfs2_clear_rgrpd to destroy the gfs2_rgrpd objects
attached to the resource group glocks.  That function should release the
buffers attached to the gfs2_bitmap objects (bi_bh), but the call to
gfs2_rgrp_brelse for doing that is missing.

When gfs2_releasepage later runs across these buffers which are still
referenced, it refuses to free them.  This causes the pages the buffers
are attached to to remain referenced as well.  With enough mount/unmount
cycles, the system will eventually run out of memory.

Fix this by adding the missing call to gfs2_rgrp_brelse in
gfs2_clear_rgrpd.

(Also fix a gfs2_rgrp_relse -> gfs2_rgrp_brelse typo in a comment.)

Fixes: 39b0f1e92908 ("GFS2: Don't brelse rgrp buffer_heads every allocation")
Cc: stable@vger.kernel.org # v4.4
Signed-off-by: Andreas Gruenbacher <agruenba@redhat.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/gfs2/rgrp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index c134c0462cee..ef24894edecc 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -732,6 +732,7 @@ void gfs2_clear_rgrpd(struct gfs2_sbd *sdp)
 			spin_lock(&gl->gl_lockref.lock);
 			gl->gl_object = NULL;
 			spin_unlock(&gl->gl_lockref.lock);
+			gfs2_rgrp_brelse(rgd);
 			gfs2_glock_add_to_lru(gl);
 			gfs2_glock_put(gl);
 		}
@@ -1139,7 +1140,7 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd)
  * @rgd: the struct gfs2_rgrpd describing the RG to read in
  *
  * Read in all of a Resource Group's header and bitmap blocks.
- * Caller must eventually call gfs2_rgrp_relse() to free the bitmaps.
+ * Caller must eventually call gfs2_rgrp_brelse() to free the bitmaps.
  *
  * Returns: errno
  */
-- 
cgit v1.2.3


From 37944cba362d17a3d8ff1180f9fd2047299e9f57 Mon Sep 17 00:00:00 2001
From: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Date: Tue, 20 Nov 2018 11:25:10 +0800
Subject: btrfs: fix pinned underflow after transaction aborted

commit fcd5e74288f7d36991b1f0fb96b8c57079645e38 upstream.

When running generic/475, we may get the following warning in dmesg:

[ 6902.102154] WARNING: CPU: 3 PID: 18013 at fs/btrfs/extent-tree.c:9776 btrfs_free_block_groups+0x2af/0x3b0 [btrfs]
[ 6902.109160] CPU: 3 PID: 18013 Comm: umount Tainted: G        W  O      4.19.0-rc8+ #8
[ 6902.110971] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
[ 6902.112857] RIP: 0010:btrfs_free_block_groups+0x2af/0x3b0 [btrfs]
[ 6902.118921] RSP: 0018:ffffc9000459bdb0 EFLAGS: 00010286
[ 6902.120315] RAX: ffff880175050bb0 RBX: ffff8801124a8000 RCX: 0000000000170007
[ 6902.121969] RDX: 0000000000000002 RSI: 0000000000170007 RDI: ffffffff8125fb74
[ 6902.123716] RBP: ffff880175055d10 R08: 0000000000000000 R09: 0000000000000000
[ 6902.125417] R10: 0000000000000000 R11: 0000000000000000 R12: ffff880175055d88
[ 6902.127129] R13: ffff880175050bb0 R14: 0000000000000000 R15: dead000000000100
[ 6902.129060] FS:  00007f4507223780(0000) GS:ffff88017ba00000(0000) knlGS:0000000000000000
[ 6902.130996] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 6902.132558] CR2: 00005623599cac78 CR3: 000000014b700001 CR4: 00000000003606e0
[ 6902.134270] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[ 6902.135981] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[ 6902.137836] Call Trace:
[ 6902.138939]  close_ctree+0x171/0x330 [btrfs]
[ 6902.140181]  ? kthread_stop+0x146/0x1f0
[ 6902.141277]  generic_shutdown_super+0x6c/0x100
[ 6902.142517]  kill_anon_super+0x14/0x30
[ 6902.143554]  btrfs_kill_super+0x13/0x100 [btrfs]
[ 6902.144790]  deactivate_locked_super+0x2f/0x70
[ 6902.146014]  cleanup_mnt+0x3b/0x70
[ 6902.147020]  task_work_run+0x9e/0xd0
[ 6902.148036]  do_syscall_64+0x470/0x600
[ 6902.149142]  ? trace_hardirqs_off_thunk+0x1a/0x1c
[ 6902.150375]  entry_SYSCALL_64_after_hwframe+0x49/0xbe
[ 6902.151640] RIP: 0033:0x7f45077a6a7b
[ 6902.157324] RSP: 002b:00007ffd589f3e68 EFLAGS: 00000246 ORIG_RAX: 00000000000000a6
[ 6902.159187] RAX: 0000000000000000 RBX: 000055e8eec732b0 RCX: 00007f45077a6a7b
[ 6902.160834] RDX: 0000000000000001 RSI: 0000000000000000 RDI: 000055e8eec73490
[ 6902.162526] RBP: 0000000000000000 R08: 000055e8eec734b0 R09: 00007ffd589f26c0
[ 6902.164141] R10: 0000000000000000 R11: 0000000000000246 R12: 000055e8eec73490
[ 6902.165815] R13: 00007f4507ac61a4 R14: 0000000000000000 R15: 00007ffd589f40d8
[ 6902.167553] irq event stamp: 0
[ 6902.168998] hardirqs last  enabled at (0): [<0000000000000000>]           (null)
[ 6902.170731] hardirqs last disabled at (0): [<ffffffff810cd810>] copy_process.part.55+0x3b0/0x1f00
[ 6902.172773] softirqs last  enabled at (0): [<ffffffff810cd810>] copy_process.part.55+0x3b0/0x1f00
[ 6902.174671] softirqs last disabled at (0): [<0000000000000000>]           (null)
[ 6902.176407] ---[ end trace 463138c2986b275c ]---
[ 6902.177636] BTRFS info (device dm-3): space_info 4 has 273465344 free, is not full
[ 6902.179453] BTRFS info (device dm-3): space_info total=276824064, used=4685824, pinned=18446744073708158976, reserved=0, may_use=0, readonly=65536

In the above line there's "pinned=18446744073708158976" which is an
unsigned u64 value of -1392640, an obvious underflow.

When transaction_kthread is running cleanup_transaction(), another
fsstress is running btrfs_commit_transaction(). The
btrfs_finish_extent_commit() may get the same range as
btrfs_destroy_pinned_extent() got, which causes the pinned underflow.

Fixes: d4b450cd4b33 ("Btrfs: fix race between transaction commit and empty block group removal")
CC: stable@vger.kernel.org # 4.4+
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Signed-off-by: Lu Fengqi <lufq.fnst@cn.fujitsu.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Sasha Levin <sashal@kernel.org>
---
 fs/btrfs/disk-io.c | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 8dbb00fbb00b..b0875ef48522 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4333,6 +4333,7 @@ static int btrfs_destroy_marked_extents(struct btrfs_root *root,
 static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
 				       struct extent_io_tree *pinned_extents)
 {
+	struct btrfs_fs_info *fs_info = root->fs_info;
 	struct extent_io_tree *unpin;
 	u64 start;
 	u64 end;
@@ -4342,21 +4343,31 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root,
 	unpin = pinned_extents;
 again:
 	while (1) {
+		/*
+		 * The btrfs_finish_extent_commit() may get the same range as
+		 * ours between find_first_extent_bit and clear_extent_dirty.
+		 * Hence, hold the unused_bg_unpin_mutex to avoid double unpin
+		 * the same extent range.
+		 */
+		mutex_lock(&fs_info->unused_bg_unpin_mutex);
 		ret = find_first_extent_bit(unpin, 0, &start, &end,
 					    EXTENT_DIRTY, NULL);
-		if (ret)
+		if (ret) {
+			mutex_unlock(&fs_info->unused_bg_unpin_mutex);
 			break;
+		}
 
 		clear_extent_dirty(unpin, start, end, GFP_NOFS);
 		btrfs_error_unpin_extent_range(root, start, end);
+		mutex_unlock(&fs_info->unused_bg_unpin_mutex);
 		cond_resched();
 	}
 
 	if (loop) {
-		if (unpin == &root->fs_info->freed_extents[0])
-			unpin = &root->fs_info->freed_extents[1];
+		if (unpin == &fs_info->freed_extents[0])
+			unpin = &fs_info->freed_extents[1];
 		else
-			unpin = &root->fs_info->freed_extents[0];
+			unpin = &fs_info->freed_extents[0];
 		loop = false;
 		goto again;
 	}
-- 
cgit v1.2.3


From ebd55db37fa7856ddc182da5587dd2189940d987 Mon Sep 17 00:00:00 2001
From: Dominique Martinet <dominique.martinet@cea.fr>
Date: Mon, 27 Aug 2018 15:12:05 +0900
Subject: v9fs_dir_readdir: fix double-free on p9stat_read error

commit 81c99089bce693b94b775b6eb888115d2d540086 upstream.

p9stat_read will call p9stat_free on error, we should only free the
struct content on success.

There also is no need to "p9stat_init" st as the read function will
zero the whole struct for us anyway, so clean up the code a bit while
we are here.

Link: http://lkml.kernel.org/r/1535410108-20650-1-git-send-email-asmadeus@codewreck.org
Signed-off-by: Dominique Martinet <dominique.martinet@cea.fr>
Reported-by: syzbot+d4252148d198410b864f@syzkaller.appspotmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/9p/vfs_dir.c | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'fs')

diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 5cc00e56206e..7d889f56b8e7 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -76,15 +76,6 @@ static inline int dt_type(struct p9_wstat *mistat)
 	return rettype;
 }
 
-static void p9stat_init(struct p9_wstat *stbuf)
-{
-	stbuf->name  = NULL;
-	stbuf->uid   = NULL;
-	stbuf->gid   = NULL;
-	stbuf->muid  = NULL;
-	stbuf->extension = NULL;
-}
-
 /**
  * v9fs_alloc_rdir_buf - Allocate buffer used for read and readdir
  * @filp: opened file structure
@@ -145,12 +136,10 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx)
 			rdir->tail = n;
 		}
 		while (rdir->head < rdir->tail) {
-			p9stat_init(&st);
 			err = p9stat_read(fid->clnt, rdir->buf + rdir->head,
 					  rdir->tail - rdir->head, &st);
 			if (err) {
 				p9_debug(P9_DEBUG_VFS, "returned %d\n", err);
-				p9stat_free(&st);
 				return -EIO;
 			}
 			reclen = st.size+2;
-- 
cgit v1.2.3


From 9891fda685f1a9da69012bb39aad81b67870cf5e Mon Sep 17 00:00:00 2001
From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Date: Fri, 2 Nov 2018 15:48:42 -0700
Subject: bfs: add sanity check at bfs_fill_super()

commit 9f2df09a33aa2c76ce6385d382693f98d7f2f07e upstream.

syzbot is reporting too large memory allocation at bfs_fill_super() [1].
Since file system image is corrupted such that bfs_sb->s_start == 0,
bfs_fill_super() is trying to allocate 8MB of continuous memory. Fix
this by adding a sanity check on bfs_sb->s_start, __GFP_NOWARN and
printf().

[1] https://syzkaller.appspot.com/bug?id=16a87c236b951351374a84c8a32f40edbc034e96

Link: http://lkml.kernel.org/r/1525862104-3407-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp
Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Reported-by: syzbot <syzbot+71c6b5d68e91149fc8a4@syzkaller.appspotmail.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Tigran Aivazian <aivazian.tigran@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/bfs/inode.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/bfs/inode.c b/fs/bfs/inode.c
index fdcb4d69f430..4714c55c1ae5 100644
--- a/fs/bfs/inode.c
+++ b/fs/bfs/inode.c
@@ -350,7 +350,8 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 
 	s->s_magic = BFS_MAGIC;
 
-	if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end)) {
+	if (le32_to_cpu(bfs_sb->s_start) > le32_to_cpu(bfs_sb->s_end) ||
+	    le32_to_cpu(bfs_sb->s_start) < BFS_BSIZE) {
 		printf("Superblock is corrupted\n");
 		goto out1;
 	}
@@ -359,9 +360,11 @@ static int bfs_fill_super(struct super_block *s, void *data, int silent)
 					sizeof(struct bfs_inode)
 					+ BFS_ROOT_INO - 1;
 	imap_len = (info->si_lasti / 8) + 1;
-	info->si_imap = kzalloc(imap_len, GFP_KERNEL);
-	if (!info->si_imap)
+	info->si_imap = kzalloc(imap_len, GFP_KERNEL | __GFP_NOWARN);
+	if (!info->si_imap) {
+		printf("Cannot allocate %u bytes\n", imap_len);
 		goto out1;
+	}
 	for (i = 0; i < BFS_ROOT_INO; i++)
 		set_bit(i, info->si_imap);
 
-- 
cgit v1.2.3


From b689a81a4c2ac139a3fdb92b08aab6dd73bc6108 Mon Sep 17 00:00:00 2001
From: Andrew Price <anprice@redhat.com>
Date: Mon, 8 Oct 2018 07:52:43 -0500
Subject: gfs2: Don't leave s_fs_info pointing to freed memory in init_sbd

commit 4c62bd9cea7bcf10292f7e4c57a2bca332942697 upstream.

When alloc_percpu() fails, sdp gets freed but sb->s_fs_info still points
to the same address. Move the assignment after that error check so that
s_fs_info can only point to a valid sdp or NULL, which is checked for
later in the error path, in gfs2_kill_super().

Reported-by: syzbot+dcb8b3587445007f5808@syzkaller.appspotmail.com
Signed-off-by: Andrew Price <anprice@redhat.com>
Signed-off-by: Bob Peterson <rpeterso@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/gfs2/ops_fstype.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index d9178388cf48..de7143e2b361 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -71,13 +71,13 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 	if (!sdp)
 		return NULL;
 
-	sb->s_fs_info = sdp;
 	sdp->sd_vfs = sb;
 	sdp->sd_lkstats = alloc_percpu(struct gfs2_pcpu_lkstats);
 	if (!sdp->sd_lkstats) {
 		kfree(sdp);
 		return NULL;
 	}
+	sb->s_fs_info = sdp;
 
 	set_bit(SDF_NOJOURNALID, &sdp->sd_flags);
 	gfs2_tune_init(&sdp->sd_tune);
-- 
cgit v1.2.3


From 522ac5232ad7b9cd644ebbd9d7ec476e0c148284 Mon Sep 17 00:00:00 2001
From: Qu Wenruo <wqu@suse.com>
Date: Fri, 7 Sep 2018 14:16:24 +0800
Subject: btrfs: Ensure btrfs_trim_fs can trim the whole filesystem

commit 6ba9fc8e628becf0e3ec94083450d089b0dec5f5 upstream.

[BUG]
fstrim on some btrfs only trims the unallocated space, not trimming any
space in existing block groups.

[CAUSE]
Before fstrim_range passed to btrfs_trim_fs(), it gets truncated to
range [0, super->total_bytes).  So later btrfs_trim_fs() will only be
able to trim block groups in range [0, super->total_bytes).

While for btrfs, any bytenr aligned to sectorsize is valid, since btrfs
uses its logical address space, there is nothing limiting the location
where we put block groups.

For filesystem with frequent balance, it's quite easy to relocate all
block groups and bytenr of block groups will start beyond
super->total_bytes.

In that case, btrfs will not trim existing block groups.

[FIX]
Just remove the truncation in btrfs_ioctl_fitrim(), so btrfs_trim_fs()
can get the unmodified range, which is normally set to [0, U64_MAX].

Reported-by: Chris Murphy <lists@colorremedies.com>
Fixes: f4c697e6406d ("btrfs: return EINVAL if start > total_bytes in fitrim ioctl")
CC: <stable@vger.kernel.org> # v4.4+
Signed-off-by: Qu Wenruo <wqu@suse.com>
Reviewed-by: Nikolay Borisov <nborisov@suse.com>
Reviewed-by: David Sterba <dsterba@suse.com>
Signed-off-by: David Sterba <dsterba@suse.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/btrfs/extent-tree.c | 10 +---------
 fs/btrfs/ioctl.c       | 11 +++++++----
 2 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'fs')

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 80cd28456f08..13ff0fdae03e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -10708,17 +10708,9 @@ int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range)
 	u64 start;
 	u64 end;
 	u64 trimmed = 0;
-	u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
 	int ret = 0;
 
-	/*
-	 * try to trim all FS space, our block group may start from non-zero.
-	 */
-	if (range->len == total_bytes)
-		cache = btrfs_lookup_first_block_group(fs_info, range->start);
-	else
-		cache = btrfs_lookup_block_group(fs_info, range->start);
-
+	cache = btrfs_lookup_first_block_group(fs_info, range->start);
 	while (cache) {
 		if (cache->key.objectid >= (range->start + range->len)) {
 			btrfs_put_block_group(cache);
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 150d3c891815..3379490ce54d 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -378,7 +378,6 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
 	struct fstrim_range range;
 	u64 minlen = ULLONG_MAX;
 	u64 num_devices = 0;
-	u64 total_bytes = btrfs_super_total_bytes(fs_info->super_copy);
 	int ret;
 
 	if (!capable(CAP_SYS_ADMIN))
@@ -402,11 +401,15 @@ static noinline int btrfs_ioctl_fitrim(struct file *file, void __user *arg)
 		return -EOPNOTSUPP;
 	if (copy_from_user(&range, arg, sizeof(range)))
 		return -EFAULT;
-	if (range.start > total_bytes ||
-	    range.len < fs_info->sb->s_blocksize)
+
+	/*
+	 * NOTE: Don't truncate the range using super->total_bytes.  Bytenr of
+	 * block group is in the logical address space, which can be any
+	 * sectorsize aligned bytenr in  the range [0, U64_MAX].
+	 */
+	if (range.len < fs_info->sb->s_blocksize)
 		return -EINVAL;
 
-	range.len = min(range.len, total_bytes - range.start);
 	range.minlen = max(range.minlen, minlen);
 	ret = btrfs_trim_fs(fs_info->tree_root, &range);
 	if (ret < 0)
-- 
cgit v1.2.3


From 3658ccbbac39cc634e357ee08ff46d0893cbc111 Mon Sep 17 00:00:00 2001
From: Salvatore Mesoraca <s.mesoraca16@gmail.com>
Date: Thu, 23 Aug 2018 17:00:35 -0700
Subject: namei: allow restricted O_CREAT of FIFOs and regular files

commit 30aba6656f61ed44cba445a3c0d38b296fa9e8f5 upstream.

Disallows open of FIFOs or regular files not owned by the user in world
writable sticky directories, unless the owner is the same as that of the
directory or the file is opened without the O_CREAT flag.  The purpose
is to make data spoofing attacks harder.  This protection can be turned
on and off separately for FIFOs and regular files via sysctl, just like
the symlinks/hardlinks protection.  This patch is based on Openwall's
"HARDEN_FIFO" feature by Solar Designer.

This is a brief list of old vulnerabilities that could have been prevented
by this feature, some of them even allow for privilege escalation:

CVE-2000-1134
CVE-2007-3852
CVE-2008-0525
CVE-2009-0416
CVE-2011-4834
CVE-2015-1838
CVE-2015-7442
CVE-2016-7489

This list is not meant to be complete.  It's difficult to track down all
vulnerabilities of this kind because they were often reported without any
mention of this particular attack vector.  In fact, before
hardlinks/symlinks restrictions, fifos/regular files weren't the favorite
vehicle to exploit them.

[s.mesoraca16@gmail.com: fix bug reported by Dan Carpenter]
  Link: https://lkml.kernel.org/r/20180426081456.GA7060@mwanda
  Link: http://lkml.kernel.org/r/1524829819-11275-1-git-send-email-s.mesoraca16@gmail.com
[keescook@chromium.org: drop pr_warn_ratelimited() in favor of audit changes in the future]
[keescook@chromium.org: adjust commit subjet]
Link: http://lkml.kernel.org/r/20180416175918.GA13494@beast
Signed-off-by: Salvatore Mesoraca <s.mesoraca16@gmail.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Suggested-by: Solar Designer <solar@openwall.com>
Suggested-by: Kees Cook <keescook@chromium.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Loic <hackurx@opensec.fr>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/namei.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index de57dd59d95f..40049d61ef37 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -869,6 +869,8 @@ static inline void put_link(struct nameidata *nd)
 
 int sysctl_protected_symlinks __read_mostly = 0;
 int sysctl_protected_hardlinks __read_mostly = 0;
+int sysctl_protected_fifos __read_mostly;
+int sysctl_protected_regular __read_mostly;
 
 /**
  * may_follow_link - Check symlink following for unsafe situations
@@ -982,6 +984,45 @@ static int may_linkat(struct path *link)
 	return -EPERM;
 }
 
+/**
+ * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory
+ *			  should be allowed, or not, on files that already
+ *			  exist.
+ * @dir: the sticky parent directory
+ * @inode: the inode of the file to open
+ *
+ * Block an O_CREAT open of a FIFO (or a regular file) when:
+ *   - sysctl_protected_fifos (or sysctl_protected_regular) is enabled
+ *   - the file already exists
+ *   - we are in a sticky directory
+ *   - we don't own the file
+ *   - the owner of the directory doesn't own the file
+ *   - the directory is world writable
+ * If the sysctl_protected_fifos (or sysctl_protected_regular) is set to 2
+ * the directory doesn't have to be world writable: being group writable will
+ * be enough.
+ *
+ * Returns 0 if the open is allowed, -ve on error.
+ */
+static int may_create_in_sticky(struct dentry * const dir,
+				struct inode * const inode)
+{
+	if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) ||
+	    (!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
+	    likely(!(dir->d_inode->i_mode & S_ISVTX)) ||
+	    uid_eq(inode->i_uid, dir->d_inode->i_uid) ||
+	    uid_eq(current_fsuid(), inode->i_uid))
+		return 0;
+
+	if (likely(dir->d_inode->i_mode & 0002) ||
+	    (dir->d_inode->i_mode & 0020 &&
+	     ((sysctl_protected_fifos >= 2 && S_ISFIFO(inode->i_mode)) ||
+	      (sysctl_protected_regular >= 2 && S_ISREG(inode->i_mode))))) {
+		return -EACCES;
+	}
+	return 0;
+}
+
 static __always_inline
 const char *get_link(struct nameidata *nd)
 {
@@ -3166,9 +3207,15 @@ finish_open:
 		error = -ELOOP;
 		goto out;
 	}
-	error = -EISDIR;
-	if ((open_flag & O_CREAT) && d_is_dir(nd->path.dentry))
-		goto out;
+	if (open_flag & O_CREAT) {
+		error = -EISDIR;
+		if (d_is_dir(nd->path.dentry))
+			goto out;
+		error = may_create_in_sticky(dir,
+					     d_backing_inode(nd->path.dentry));
+		if (unlikely(error))
+			goto out;
+	}
 	error = -ENOTDIR;
 	if ((nd->flags & LOOKUP_DIRECTORY) && !d_can_lookup(nd->path.dentry))
 		goto out;
-- 
cgit v1.2.3