[PATCH OpenHarmony-5.10 00/16] sync 5.10 bugfix

From: Yu Changchun <yuchangchun1@huawei.com> Aili Yao (1): mm,hwpoison: return -EHWPOISON to denote that the page has already been poisoned Arnd Bergmann (2): asm-generic: fix ffs -Wshadow warning seqlock: avoid -Wshadow warnings Guoqing Jiang (1): md: revert io stats accounting Leah Rumancik (1): ext4: wipe ext4_dir_entry2 upon file deletion Li Huafei (2): perf env: Normalize aarch64.* and arm64.* to arm64 in normalize_arch() perf annotate: Add error log in symbol__annotate() Liang Wang (1): lib: use PFN_PHYS() in devmem_is_allowed() Paul E. McKenney (1): srcu: Take early exit on memory-allocation failure Russell King (1): ARM: footbridge: remove personal server platform Seunghun Han (2): acpi: acpica: fix acpi operand cache leak in dsutils.c acpi: acpica: fix acpi parse and parseext cache leaks Tony Luck (1): mm/memory-failure: use a mutex to avoid memory_failure() races Yang Yingliang (2): cgroup: disable kernel memory accounting for all memory cgroups by default hfs: fix null-ptr-deref in hfs_find_init() Yu Kuai (1): blk-mq: clear active_queues before clearing BLK_MQ_F_TAG_QUEUE_SHARED .../admin-guide/cgroup-v1/memory.rst | 6 +- .../admin-guide/kernel-parameters.txt | 1 + arch/arm/configs/footbridge_defconfig | 1 - arch/arm/mach-footbridge/Kconfig | 21 ------- arch/arm/mach-footbridge/Makefile | 2 - arch/arm/mach-footbridge/personal-pci.c | 57 ------------------- arch/arm/mach-footbridge/personal.c | 25 -------- arch/arm/mm/mmap.c | 2 +- block/blk-mq.c | 6 +- drivers/acpi/acpica/dsutils.c | 9 ++- drivers/acpi/acpica/psobject.c | 53 ++++++----------- drivers/md/md.c | 45 --------------- drivers/md/md.h | 1 - fs/ext4/namei.c | 24 +++++++- fs/hfs/bfind.c | 2 + include/asm-generic/bitops/builtin-ffs.h | 5 +- include/linux/seqlock.h | 14 ++--- kernel/rcu/srcutree.c | 4 +- mm/memcontrol.c | 6 +- mm/memory-failure.c | 40 ++++++++----- tools/perf/util/annotate.c | 4 +- tools/perf/util/env.c | 2 +- 22 files changed, 101 insertions(+), 229 deletions(-) delete mode 100644 arch/arm/mach-footbridge/personal-pci.c delete mode 100644 arch/arm/mach-footbridge/personal.c -- 2.25.1

From: Yang Yingliang <yangyingliang@huawei.com> ohos inclusion category: bugfix issue: #I4LRGQ CVE: NA ---------------------------------------- The kernel memory accounting for all memory cgroups is not stable, and it will cause a 100% regression in hackbench compared with kernel-4.19, so disable it by default. We can use the following command line to enable or disable it: cgroup.memory=kmem or cgroup.memory=kmem. Signed-off-by: Yang Yingliang <yangyingliang@huawei.com> Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- Documentation/admin-guide/cgroup-v1/memory.rst | 6 +++--- Documentation/admin-guide/kernel-parameters.txt | 1 + mm/memcontrol.c | 6 ++++-- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/Documentation/admin-guide/cgroup-v1/memory.rst b/Documentation/admin-guide/cgroup-v1/memory.rst index 12757e63b26c..fd913ae64ea8 100644 --- a/Documentation/admin-guide/cgroup-v1/memory.rst +++ b/Documentation/admin-guide/cgroup-v1/memory.rst @@ -308,9 +308,9 @@ the amount of kernel memory used by the system. Kernel memory is fundamentally different than user memory, since it can't be swapped out, which makes it possible to DoS the system by consuming too much of this precious resource. -Kernel memory accounting is enabled for all memory cgroups by default. But -it can be disabled system-wide by passing cgroup.memory=nokmem to the kernel -at boot time. In this case, kernel memory will not be accounted at all. +Kernel memory accounting is disabled for all memory cgroups by default. But +it can be enabled system-wide by passing cgroup.memory=kmem to the kernel +at boot time. In this case, kernel memory will all be accounted. Kernel memory limits are not imposed for the root cgroup. Usage for the root cgroup may or may not be accounted. The memory used is accumulated into diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f103667d3727..4c0a0ca7542e 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -517,6 +517,7 @@ Format: <string> nosocket -- Disable socket memory accounting. nokmem -- Disable kernel memory accounting. + kmem -- Enable kernel memory accounting. checkreqprot [SELINUX] Set initial checkreqprot flag value. Format: { "0" | "1" } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 92bf987d0a41..00156aebbcad 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -79,8 +79,8 @@ DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg); /* Socket memory accounting disabled? */ static bool cgroup_memory_nosocket; -/* Kernel memory accounting disabled? */ -static bool cgroup_memory_nokmem; +/* Kernel memory accounting disabled */ +static bool cgroup_memory_nokmem = true; /* Whether the swap controller is active */ #ifdef CONFIG_MEMCG_SWAP @@ -7134,6 +7134,8 @@ static int __init cgroup_memory(char *s) cgroup_memory_nosocket = true; if (!strcmp(token, "nokmem")) cgroup_memory_nokmem = true; + else if (!strcmp(token, "kmem")) + cgroup_memory_nokmem = false; } return 0; } -- 2.25.1

From: Li Huafei <lihuafei1@huawei.com> maillist inclusion category: bugfix issue: #I4LRGQ CVE: NA Reference: https://lkml.org/lkml/2021/7/26/599 -------------------------------- On my aarch64 big endian machine, the perf annotate does not work. # perf annotate Percent | Source code & Disassembly of [kernel.kallsyms] for cycles (253 samples, percent: local period) -------------------------------------------------------------------------------------------------------------- Percent | Source code & Disassembly of [kernel.kallsyms] for cycles (1 samples, percent: local period) ------------------------------------------------------------------------------------------------------------ Percent | Source code & Disassembly of [kernel.kallsyms] for cycles (47 samples, percent: local period) ------------------------------------------------------------------------------------------------------------- ... This is because the arch_find() function uses the normalized architecture name provided by normalize_arch(), and my machine's architecture name aarch64_be is not normalized to arm64. Like other architectures such as arm and powerpc, we can fuzzy match the architecture names associated with aarch64.* and normalize them. It seems that there is also arm64_be architecture name, which we also normalize to arm64. Signed-off-by: Li Huafei <lihuafei1@huawei.com> Reviewed-by: James Clark <james.clark@arm.com> Signed-off-by: Li Huafei <lihuafei1@huawei.com> Reviewed-by: Kuohai Xu <xukuohai@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- tools/perf/util/env.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index f0dceb527ca3..d5fd6bddaa6d 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -328,7 +328,7 @@ static const char *normalize_arch(char *arch) return "x86"; if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5)) return "sparc"; - if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64")) + if (!strncmp(arch, "aarch64", 7) || !strncmp(arch, "arm64", 5)) return "arm64"; if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110")) return "arm"; -- 2.25.1

From: Li Huafei <lihuafei1@huawei.com> maillist inclusion category: bugfix issue: #I4LRGQ Reference: https://lkml.org/lkml/2021/7/26/406 -------------------------------- When users use the perf annotate feature on unsupported machines, error logs should be printed for user feedback. Signed-off-by: Li Huafei <lihuafei1@huawei.com> Reviewed-by: James Clark <james.clark@arm.com> Signed-off-by: Li Huafei <lihuafei1@huawei.com> Reviewed-by: Kuohai Xu <xukuohai@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- tools/perf/util/annotate.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 308189454788..4aaaf23b4878 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -2178,8 +2178,10 @@ int symbol__annotate(struct map_symbol *ms, struct evsel *evsel, return errno; args.arch = arch = arch__find(arch_name); - if (arch == NULL) + if (arch == NULL) { + pr_err("%s: unsupported arch %s\n", __func__, arch_name); return ENOTSUP; + } if (parch) *parch = arch; -- 2.25.1

From: Tony Luck <tony.luck@intel.com> mainline inclusion from mainline-v5.13 commit 171936ddaf97e6f4e1264f4128bb5cf15691339c category: bugfix issue: #I4LRGQ Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... ------------------------------------------------- Patch series "mm,hwpoison: fix sending SIGBUS for Action Required MCE", v5. I wrote this patchset to materialize what I think is the current allowable solution mentioned by the previous discussion [1]. I simply borrowed Tony's mutex patch and Aili's return code patch, then I queued another one to find error virtual address in the best effort manner. I know that this is not a perfect solution, but should work for some typical case. [1]: https://lore.kernel.org/linux-mm/20210331192540.2141052f@alex-virtual-machin... This patch (of 2): There can be races when multiple CPUs consume poison from the same page. The first into memory_failure() atomically sets the HWPoison page flag and begins hunting for tasks that map this page. Eventually it invalidates those mappings and may send a SIGBUS to the affected tasks. But while all that work is going on, other CPUs see a "success" return code from memory_failure() and so they believe the error has been handled and continue executing. Fix by wrapping most of the internal parts of memory_failure() in a mutex. [akpm@linux-foundation.org: make mf_mutex local to memory_failure()] Link: https://lkml.kernel.org/r/20210521030156.2612074-1-nao.horiguchi@gmail.com Link: https://lkml.kernel.org/r/20210521030156.2612074-2-nao.horiguchi@gmail.com Signed-off-by: Tony Luck <tony.luck@intel.com> Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com> Reviewed-by: Borislav Petkov <bp@suse.de> Reviewed-by: Oscar Salvador <osalvador@suse.de> Cc: Aili Yao <yaoaili@kingsoft.com> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: David Hildenbrand <david@redhat.com> Cc: Jue Wang <juew@google.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Conflicts: mm/memory-failure.c Signed-off-by: Nanyong Sun <sunnanyong@huawei.com> Reviewed-by: Chen Wandun <chenwandun@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- mm/memory-failure.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index d00d1b2e97da..8df59bb777a7 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1329,8 +1329,9 @@ int memory_failure(unsigned long pfn, int flags) struct page *hpage; struct page *orig_head; struct dev_pagemap *pgmap; - int res; + int res = 0; unsigned long page_flags; + static DEFINE_MUTEX(mf_mutex); if (!sysctl_memory_failure_recovery) panic("Memory failure on page %lx", pfn); @@ -1348,12 +1349,17 @@ int memory_failure(unsigned long pfn, int flags) return -ENXIO; } - if (PageHuge(p)) - return memory_failure_hugetlb(pfn, flags); + mutex_lock(&mf_mutex); + + if (PageHuge(p)) { + res = memory_failure_hugetlb(pfn, flags); + goto unlock_mutex; + } + if (TestSetPageHWPoison(p)) { pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); - return 0; + goto unlock_mutex; } orig_head = hpage = compound_head(p); @@ -1373,17 +1379,18 @@ int memory_failure(unsigned long pfn, int flags) if (!(flags & MF_COUNT_INCREASED) && !get_hwpoison_page(p)) { if (is_free_buddy_page(p)) { action_result(pfn, MF_MSG_BUDDY, MF_DELAYED); - return 0; } else { action_result(pfn, MF_MSG_KERNEL_HIGH_ORDER, MF_IGNORED); - return -EBUSY; + res = -EBUSY; } + goto unlock_mutex; } if (PageTransHuge(hpage)) { if (try_to_split_thp_page(p, "Memory Failure") < 0) { action_result(pfn, MF_MSG_UNSPLIT_THP, MF_IGNORED); - return -EBUSY; + res = -EBUSY; + goto unlock_mutex; } VM_BUG_ON_PAGE(!page_count(p), p); } @@ -1403,7 +1410,7 @@ int memory_failure(unsigned long pfn, int flags) action_result(pfn, MF_MSG_BUDDY, MF_DELAYED); else action_result(pfn, MF_MSG_BUDDY_2ND, MF_DELAYED); - return 0; + goto unlock_mutex; } lock_page(p); @@ -1415,7 +1422,7 @@ int memory_failure(unsigned long pfn, int flags) if (PageCompound(p) && compound_head(p) != orig_head) { action_result(pfn, MF_MSG_DIFFERENT_COMPOUND, MF_IGNORED); res = -EBUSY; - goto out; + goto unlock_page; } /* @@ -1435,14 +1442,14 @@ int memory_failure(unsigned long pfn, int flags) num_poisoned_pages_dec(); unlock_page(p); put_page(p); - return 0; + goto unlock_mutex; } if (hwpoison_filter(p)) { if (TestClearPageHWPoison(p)) num_poisoned_pages_dec(); unlock_page(p); put_page(p); - return 0; + goto unlock_mutex; } /* @@ -1466,7 +1473,7 @@ int memory_failure(unsigned long pfn, int flags) if (!hwpoison_user_mappings(p, pfn, flags, &p)) { action_result(pfn, MF_MSG_UNMAP_FAILED, MF_IGNORED); res = -EBUSY; - goto out; + goto unlock_page; } /* @@ -1475,13 +1482,15 @@ int memory_failure(unsigned long pfn, int flags) if (PageLRU(p) && !PageSwapCache(p) && p->mapping == NULL) { action_result(pfn, MF_MSG_TRUNCATED_LRU, MF_IGNORED); res = -EBUSY; - goto out; + goto unlock_page; } identify_page_state: res = identify_page_state(pfn, p, page_flags); -out: +unlock_page: unlock_page(p); +unlock_mutex: + mutex_unlock(&mf_mutex); return res; } EXPORT_SYMBOL_GPL(memory_failure); -- 2.25.1

From: Aili Yao <yaoaili@kingsoft.com> mainline inclusion from mainline-v5.13 commit 47af12bae17f99b5e77f8651cb7f3e1877610acf category: bugfix issue: #I4LRGQ Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... ------------------------------------------------- When memory_failure() is called with MF_ACTION_REQUIRED on the page that has already been hwpoisoned, memory_failure() could fail to send SIGBUS to the affected process, which results in infinite loop of MCEs. Currently memory_failure() returns 0 if it's called for already hwpoisoned page, then the caller, kill_me_maybe(), could return without sending SIGBUS to current process. An action required MCE is raised when the current process accesses to the broken memory, so no SIGBUS means that the current process continues to run and access to the error page again soon, so running into MCE loop. This issue can arise for example in the following scenarios: - Two or more threads access to the poisoned page concurrently. If local MCE is enabled, MCE handler independently handles the MCE events. So there's a race among MCE events, and the second or latter threads fall into the situation in question. - If there was a precedent memory error event and memory_failure() for the event failed to unmap the error page for some reason, the subsequent memory access to the error page triggers the MCE loop situation. To fix the issue, make memory_failure() return an error code when the error page has already been hwpoisoned. This allows memory error handler to control how it sends signals to userspace. And make sure that any process touching a hwpoisoned page should get a SIGBUS even in "already hwpoisoned" path of memory_failure() as is done in page fault path. Link: https://lkml.kernel.org/r/20210521030156.2612074-3-nao.horiguchi@gmail.com Signed-off-by: Aili Yao <yaoaili@kingsoft.com> Signed-off-by: Naoya Horiguchi <naoya.horiguchi@nec.com> Reviewed-by: Oscar Salvador <osalvador@suse.de> Cc: Andy Lutomirski <luto@kernel.org> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bp@suse.de> Cc: David Hildenbrand <david@redhat.com> Cc: Jue Wang <juew@google.com> Cc: Tony Luck <tony.luck@intel.com> Cc: <stable@vger.kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Nanyong Sun <sunnanyong@huawei.com> Reviewed-by: Chen Wandun <chenwandun@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- mm/memory-failure.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 8df59bb777a7..c222d5a35338 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -1157,7 +1157,7 @@ static int memory_failure_hugetlb(unsigned long pfn, int flags) if (TestSetPageHWPoison(head)) { pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); - return 0; + return -EHWPOISON; } num_poisoned_pages_inc(); @@ -1359,6 +1359,7 @@ int memory_failure(unsigned long pfn, int flags) if (TestSetPageHWPoison(p)) { pr_err("Memory failure: %#lx: already hardware poisoned\n", pfn); + res = -EHWPOISON; goto unlock_mutex; } -- 2.25.1

From: Arnd Bergmann <arnd@arndb.de> mainline inclusion from mainline-v5.11 commit 6f6573a4044adefbd07f1bd951a2041150e888d7 category: bugfix issue: #I4LRGQ Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... ------------------------------------------------- gcc -Wshadow warns about the ffs() definition that has the same name as the global ffs() built-in: include/asm-generic/bitops/builtin-ffs.h:13:28: warning: declaration of 'ffs' shadows a built-in function [-Wshadow] This is annoying because 'make W=2' warns every time this header gets included. Change it to use a #define instead, making callers directly reference the builtin. Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Zhang Jianhua <chris.zjh@huawei.com> Reviewed-by: He Ying <heying24@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- include/asm-generic/bitops/builtin-ffs.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/include/asm-generic/bitops/builtin-ffs.h b/include/asm-generic/bitops/builtin-ffs.h index 458c85ebcd15..1dacfdb4247e 100644 --- a/include/asm-generic/bitops/builtin-ffs.h +++ b/include/asm-generic/bitops/builtin-ffs.h @@ -10,9 +10,6 @@ * the libc and compiler builtin ffs routines, therefore * differs in spirit from the above ffz (man ffs). */ -static __always_inline int ffs(int x) -{ - return __builtin_ffs(x); -} +#define ffs(x) __builtin_ffs(x) #endif -- 2.25.1

From: Arnd Bergmann <arnd@arndb.de> mainline inclusion from mainline-v5.11 commit a07c45312f06e288417049208c344ad76074627d category: bugfix issue: #I4LRGQ CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... ------------------------------------------------- When building with W=2, there is a flood of warnings about the seqlock macros shadowing local variables: 19806 linux/seqlock.h:331:11: warning: declaration of 'seq' shadows a previous local [-Wshadow] 48 linux/seqlock.h:348:11: warning: declaration of 'seq' shadows a previous local [-Wshadow] 8 linux/seqlock.h:379:11: warning: declaration of 'seq' shadows a previous local [-Wshadow] Prefix the local variables to make the warning useful elsewhere again. Fixes: 52ac39e5db51 ("seqlock: seqcount_t: Implement all read APIs as statement expressions") Signed-off-by: Arnd Bergmann <arnd@arndb.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lkml.kernel.org/r/20201026165044.3722931-1-arnd@kernel.org Signed-off-by: Zhang Jianhua <chris.zjh@huawei.com> Reviewed-by: He Ying <heying24@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- include/linux/seqlock.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 1ac20d75b061..fb89b05066f4 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -328,13 +328,13 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu */ #define __read_seqcount_begin(s) \ ({ \ - unsigned seq; \ + unsigned __seq; \ \ - while ((seq = __seqcount_sequence(s)) & 1) \ + while ((__seq = __seqcount_sequence(s)) & 1) \ cpu_relax(); \ \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ - seq; \ + __seq; \ }) /** @@ -345,10 +345,10 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu */ #define raw_read_seqcount_begin(s) \ ({ \ - unsigned seq = __read_seqcount_begin(s); \ + unsigned _seq = __read_seqcount_begin(s); \ \ smp_rmb(); \ - seq; \ + _seq; \ }) /** @@ -376,11 +376,11 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu */ #define raw_read_seqcount(s) \ ({ \ - unsigned seq = __seqcount_sequence(s); \ + unsigned __seq = __seqcount_sequence(s); \ \ smp_rmb(); \ kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \ - seq; \ + __seq; \ }) /** -- 2.25.1

From: Liang Wang <wangliang101@huawei.com> maillist inclusion category: bugfix issue: #I4LRGQ CVE: NA Reference: https://lore.kernel.org/stable/20210731025057.78825-1-wangliang101@huawei.co... -------------------------------- The physical address may exceed 32 bits on 32-bit systems with more than 32 bits of physcial address,use PFN_PHYS() in devmem_is_allowed(), or the physical address may overflow and be truncated. We found this bug when mapping a high addresses through devmem tool, when CONFIG_STRICT_DEVMEM is enabled on the ARM with ARM_LPAE and devmem is used to map a high address that is not in the iomem address range, an unexpected error indicating no permission is returned. This bug was initially introduced from v2.6.37, and the function was moved to lib when v5.11. Link: https://lkml.kernel.org/r/20210731025057.78825-1-wangliang101@huawei.com Fixes: 087aaffcdf9c ("ARM: implement CONFIG_STRICT_DEVMEM by disabling access to RAM via /dev/mem") Fixes: 527701eda5f1 ("lib: Add a generic version of devmem_is_allowed()") Signed-off-by: Liang Wang <wangliang101@huawei.com> Reviewed-by: Luis Chamberlain <mcgrof@kernel.org> Cc: Palmer Dabbelt <palmerdabbelt@google.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: Russell King <linux@armlinux.org.uk> Cc: Liang Wang <wangliang101@huawei.com> Cc: Xiaoming Ni <nixiaoming@huawei.com> Cc: Kefeng Wang <wangkefeng.wang@huawei.com> Cc: <stable@vger.kernel.org> [2.6.37+] Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au> [KF: fix devmem_is_allowed() on ARM] Signed-off-by: Kefeng Wang <wangkefeng.wang@huawei.com> Reviewed-by: Tong Tiangen <tongtiangen@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- arch/arm/mm/mmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index b8d912ac9e61..c64124fb34ea 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -179,7 +179,7 @@ int valid_mmap_phys_addr_range(unsigned long pfn, size_t size) */ int devmem_is_allowed(unsigned long pfn) { - if (iomem_is_exclusive(pfn << PAGE_SHIFT)) + if (iomem_is_exclusive(PFN_PHYS(pfn))) return 0; if (!page_is_ram(pfn)) return 1; -- 2.25.1

From: Yu Kuai <yukuai3@huawei.com> mainline inclusion from mainline-5.14-rc6 commit 454bb6775202d94f0f489c4632efecdb62d3c904 category: bugfix issue: #I4LRGQ CVE: NA Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... ------------------------------------------------- We run a test that delete and recover devcies frequently(two devices on the same host), and we found that 'active_queues' is super big after a period of time. If device a and device b share a tag set, and a is deleted, then blk_mq_exit_queue() will clear BLK_MQ_F_TAG_QUEUE_SHARED because there is only one queue that are using the tag set. However, if b is still active, the active_queues of b might never be cleared even if b is deleted. Thus clear active_queues before BLK_MQ_F_TAG_QUEUE_SHARED is cleared. Signed-off-by: Yu Kuai <yukuai3@huawei.com> Reviewed-by: Ming Lei <ming.lei@redhat.com> Link: https://lore.kernel.org/r/20210731062130.1533893-1-yukuai3@huawei.com Signed-off-by: Jens Axboe <axboe@kernel.dk> Reviewed-by: Jason Yan <yanaijie@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- block/blk-mq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 0116f6e928ae..b0e7ec85a41c 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2958,10 +2958,12 @@ static void queue_set_hctx_shared(struct request_queue *q, bool shared) int i; queue_for_each_hw_ctx(q, hctx, i) { - if (shared) + if (shared) { hctx->flags |= BLK_MQ_F_TAG_QUEUE_SHARED; - else + } else { + blk_mq_tag_idle(hctx); hctx->flags &= ~BLK_MQ_F_TAG_QUEUE_SHARED; + } } } -- 2.25.1

From: "Paul E. McKenney" <paulmck@kernel.org> mainline inclusion from mainline-v5.11-rc1 commit 50edb988534c621a56ca103c0c16ac59e7399f01 category: bugfix issue: #I4LRGQ CVE: NA ------------------------------------------------------------------------- It turns out that init_srcu_struct() can be invoked from usermode tasks, and that fatal signals received by these tasks can cause memory-allocation failures. These failures are not handled well by init_srcu_struct(), so much so that NULL pointer dereferences can result. This commit therefore causes init_srcu_struct() to take an early exit upon detection of memory-allocation failure. Link: https://lore.kernel.org/lkml/20200908144306.33355-1-aik@ozlabs.ru/ Reported-by: Alexey Kardashevskiy <aik@ozlabs.ru> Tested-by: Alexey Kardashevskiy <aik@ozlabs.ru> Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Signed-off-by: Zhen Lei <thunder.leizhen@huawei.com> Reviewed-by: Xie XiuQi <xiexiuqi@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- kernel/rcu/srcutree.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index b8821665c435..1ca734767b9d 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -180,11 +180,13 @@ static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) INIT_DELAYED_WORK(&ssp->work, process_srcu); if (!is_static) ssp->sda = alloc_percpu(struct srcu_data); + if (!ssp->sda) + return -ENOMEM; init_srcu_struct_nodes(ssp, is_static); ssp->srcu_gp_seq_needed_exp = 0; ssp->srcu_last_gp_end = ktime_get_mono_fast_ns(); smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */ - return ssp->sda ? 0 : -ENOMEM; + return 0; } #ifdef CONFIG_DEBUG_LOCK_ALLOC -- 2.25.1

From: Yang Yingliang <yangyingliang@huawei.com> ohos inclusion category: bugfix issue #I4LRGQ CVE: CVE-2018-12928 --------------------------------------------- It will cause a null-ptr-deref in hfs_find_init() in fuzz test. [ 107.092729] hfs: continuing without an alternate MDB [ 107.097632] general protection fault, probably for non-canonical address 0xdffffc0000000008: 0000 [#1] SMP KASAN PTI [ 107.104679] KASAN: null-ptr-deref in range [0x0000000000000040-0x0000000000000047] [ 107.109100] CPU: 0 PID: 379 Comm: hfs_inject Not tainted 5.7.0-rc7-00001-g24627f5f2973 #897 [ 107.114142] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 107.121095] RIP: 0010:hfs_find_init+0x72/0x170 [ 107.123609] Code: c1 ea 03 80 3c 02 00 0f 85 e6 00 00 00 4c 8d 65 40 48 c7 43 18 00 00 00 00 48 b8 00 00 00 00 00 fc ff df 4c 89 e2 48 c1 ea 03 <0f> b6 04 02 84 c0 74 08 3c 03 0f 8e a5 00 00 00 8b 45 40 be c0 0c [ 107.134660] RSP: 0018:ffff88810291f3f8 EFLAGS: 00010202 [ 107.137897] RAX: dffffc0000000000 RBX: ffff88810291f468 RCX: 1ffff110175cdf05 [ 107.141874] RDX: 0000000000000008 RSI: ffff88810291f468 RDI: ffff88810291f480 [ 107.145844] RBP: 0000000000000000 R08: 0000000000000000 R09: ffffed1020381013 [ 107.149431] R10: ffff88810291f500 R11: ffffed1020381012 R12: 0000000000000040 [ 107.152315] R13: 0000000000000000 R14: ffff888101c0814a R15: ffff88810291f468 [ 107.155464] FS: 00000000009ea880(0000) GS:ffff88810c600000(0000) knlGS:0000000000000000 [ 107.159795] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 107.162987] CR2: 00005605a19dd284 CR3: 0000000103a0c006 CR4: 0000000000020ef0 [ 107.166665] Call Trace: [ 107.167969] ? find_held_lock+0x33/0x1c0 [ 107.169972] hfs_ext_read_extent+0x16b/0xb00 [ 107.172092] ? create_page_buffers+0x14e/0x1b0 [ 107.174303] ? hfs_free_extents+0x280/0x280 [ 107.176437] ? lock_downgrade+0x730/0x730 [ 107.178272] hfs_get_block+0x496/0x8a0 [ 107.179972] block_read_full_page+0x241/0x8d0 [ 107.181971] ? hfs_extend_file+0xae0/0xae0 [ 107.183814] ? end_buffer_async_read_io+0x10/0x10 [ 107.185954] ? add_to_page_cache_lru+0x13f/0x1f0 [ 107.188006] ? add_to_page_cache_locked+0x10/0x10 [ 107.190175] do_read_cache_page+0xc6a/0x1180 [ 107.192096] ? generic_file_read_iter+0x4c0/0x4c0 [ 107.194234] ? hfs_btree_open+0x408/0x1000 [ 107.196068] ? lock_downgrade+0x730/0x730 [ 107.197926] ? wake_bit_function+0x180/0x180 [ 107.199845] ? lockdep_init_map_waits+0x267/0x7c0 [ 107.201895] hfs_btree_open+0x455/0x1000 [ 107.203479] hfs_mdb_get+0x122c/0x1ae8 [ 107.205065] ? hfs_mdb_put+0x350/0x350 [ 107.206590] ? queue_work_node+0x260/0x260 [ 107.208309] ? rcu_read_lock_sched_held+0xa1/0xd0 [ 107.210227] ? lockdep_init_map_waits+0x267/0x7c0 [ 107.212144] ? lockdep_init_map_waits+0x267/0x7c0 [ 107.213979] hfs_fill_super+0x9ba/0x1280 [ 107.215444] ? bdev_name.isra.9+0xf1/0x2b0 [ 107.217028] ? hfs_remount+0x190/0x190 [ 107.218428] ? pointer+0x5da/0x710 [ 107.219745] ? file_dentry_name+0xf0/0xf0 [ 107.221262] ? mount_bdev+0xd1/0x330 [ 107.222592] ? vsnprintf+0x7bd/0x1250 [ 107.224007] ? pointer+0x710/0x710 [ 107.225332] ? down_write+0xe5/0x160 [ 107.226698] ? hfs_remount+0x190/0x190 [ 107.228120] ? snprintf+0x91/0xc0 [ 107.229388] ? vsprintf+0x10/0x10 [ 107.230628] ? sget+0x3af/0x4a0 [ 107.231848] ? hfs_remount+0x190/0x190 [ 107.233300] mount_bdev+0x26e/0x330 [ 107.234611] ? hfs_statfs+0x540/0x540 [ 107.236015] legacy_get_tree+0x101/0x1f0 [ 107.237431] ? security_capable+0x58/0x90 [ 107.238832] vfs_get_tree+0x89/0x2d0 [ 107.240082] ? ns_capable_common+0x5c/0xd0 [ 107.241521] do_mount+0xd8a/0x1720 [ 107.242727] ? lock_downgrade+0x730/0x730 [ 107.244116] ? copy_mount_string+0x20/0x20 [ 107.245557] ? _copy_from_user+0xbe/0x100 [ 107.246967] ? memdup_user+0x47/0x70 [ 107.248212] __x64_sys_mount+0x162/0x1b0 [ 107.249537] do_syscall_64+0xa5/0x4f0 [ 107.250742] entry_SYSCALL_64_after_hwframe+0x49/0xb3 [ 107.252369] RIP: 0033:0x44e8ea [ 107.253360] Code: 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 83 c8 ff c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 44 00 00 49 89 ca b8 a5 00 00 00 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 c7 c1 c0 ff ff ff f7 d8 64 89 01 48 [ 107.259240] RSP: 002b:00007ffd910e4c28 EFLAGS: 00000207 ORIG_RAX: 00000000000000a5 [ 107.261668] RAX: ffffffffffffffda RBX: 0000000000400400 RCX: 000000000044e8ea [ 107.263920] RDX: 000000000049321e RSI: 0000000000493222 RDI: 00007ffd910e4d00 [ 107.266177] RBP: 00007ffd910e5d10 R08: 0000000000000000 R09: 000000000000000a [ 107.268451] R10: 0000000000000001 R11: 0000000000000207 R12: 0000000000401c40 [ 107.270721] R13: 0000000000000000 R14: 00000000006ba018 R15: 0000000000000000 [ 107.273025] Modules linked in: [ 107.274029] Dumping ftrace buffer: [ 107.275121] (ftrace buffer empty) [ 107.276370] ---[ end trace c5e0b9d684f3570e ]--- We need check tree in hfs_find_init(). https://lore.kernel.org/linux-fsdevel/20180419024358.GA5215@bombadil.infrade... https://marc.info/?l=linux-fsdevel&m=152406881024567&w=2 References: CVE-2018-12928 Signed-off-by: Yang Yingliang <yangyingliang@huawei.com> Reviewed-by: Jason Yan <yanaijie@huawei.com> Signed-off-by: Yang Yingliang <yangyingliang@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- fs/hfs/bfind.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c index ef9498a6e88a..ae44c8fa86ec 100644 --- a/fs/hfs/bfind.c +++ b/fs/hfs/bfind.c @@ -16,6 +16,8 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd) { void *ptr; + if (!tree) + return -EINVAL; fd->tree = tree; fd->bnode = NULL; ptr = kmalloc(tree->max_key_len * 2 + 4, GFP_KERNEL); -- 2.25.1

From: Russell King <rmk+kernel@armlinux.org.uk> mainline inclusion from mainline-5.13-rc1 commit 298a58e165e447ccfaae35fe9f651f9d7e15166f issue: #I4LRGQ CVE: CVE-2021-32078 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=... -------------------------------- Remove the personal server platform, as that has had an array overrun issue identified. It is believed that no one is using this code. Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk> Conflicts: arch/arm/mach-footbridge/personal-pci.c Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- arch/arm/configs/footbridge_defconfig | 1 - arch/arm/mach-footbridge/Kconfig | 21 --------- arch/arm/mach-footbridge/Makefile | 2 - arch/arm/mach-footbridge/personal-pci.c | 57 ------------------------- arch/arm/mach-footbridge/personal.c | 25 ----------- 5 files changed, 106 deletions(-) delete mode 100644 arch/arm/mach-footbridge/personal-pci.c delete mode 100644 arch/arm/mach-footbridge/personal.c diff --git a/arch/arm/configs/footbridge_defconfig b/arch/arm/configs/footbridge_defconfig index 3a7938f244e5..2aa3ebeb89d7 100644 --- a/arch/arm/configs/footbridge_defconfig +++ b/arch/arm/configs/footbridge_defconfig @@ -7,7 +7,6 @@ CONFIG_EXPERT=y CONFIG_MODULES=y CONFIG_ARCH_FOOTBRIDGE=y CONFIG_ARCH_CATS=y -CONFIG_ARCH_PERSONAL_SERVER=y CONFIG_ARCH_EBSA285_HOST=y CONFIG_ARCH_NETWINDER=y CONFIG_LEDS=y diff --git a/arch/arm/mach-footbridge/Kconfig b/arch/arm/mach-footbridge/Kconfig index 844aa585b966..728aff93fba9 100644 --- a/arch/arm/mach-footbridge/Kconfig +++ b/arch/arm/mach-footbridge/Kconfig @@ -16,27 +16,6 @@ config ARCH_CATS Saying N will reduce the size of the Footbridge kernel. -config ARCH_PERSONAL_SERVER - bool "Compaq Personal Server" - select FOOTBRIDGE_HOST - select ISA - select ISA_DMA - select FORCE_PCI - help - Say Y here if you intend to run this kernel on the Compaq - Personal Server. - - Saying N will reduce the size of the Footbridge kernel. - - The Compaq Personal Server is not available for purchase. - There are no product plans beyond the current research - prototypes at this time. Information is available at: - - <http://www.crl.hpl.hp.com/projects/personalserver/> - - If you have any questions or comments about the Compaq Personal - Server, send e-mail to <skiff@crl.dec.com>. - config ARCH_EBSA285_ADDIN bool "EBSA285 (addin mode)" select ARCH_EBSA285 diff --git a/arch/arm/mach-footbridge/Makefile b/arch/arm/mach-footbridge/Makefile index a09f1041f141..6262993c0555 100644 --- a/arch/arm/mach-footbridge/Makefile +++ b/arch/arm/mach-footbridge/Makefile @@ -11,12 +11,10 @@ pci-y += dc21285.o pci-$(CONFIG_ARCH_CATS) += cats-pci.o pci-$(CONFIG_ARCH_EBSA285_HOST) += ebsa285-pci.o pci-$(CONFIG_ARCH_NETWINDER) += netwinder-pci.o -pci-$(CONFIG_ARCH_PERSONAL_SERVER) += personal-pci.o obj-$(CONFIG_ARCH_CATS) += cats-hw.o isa-timer.o obj-$(CONFIG_ARCH_EBSA285) += ebsa285.o dc21285-timer.o obj-$(CONFIG_ARCH_NETWINDER) += netwinder-hw.o isa-timer.o -obj-$(CONFIG_ARCH_PERSONAL_SERVER) += personal.o dc21285-timer.o obj-$(CONFIG_PCI) +=$(pci-y) diff --git a/arch/arm/mach-footbridge/personal-pci.c b/arch/arm/mach-footbridge/personal-pci.c deleted file mode 100644 index 9d19aa98a663..000000000000 --- a/arch/arm/mach-footbridge/personal-pci.c +++ /dev/null @@ -1,57 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/arch/arm/mach-footbridge/personal-pci.c - * - * PCI bios-type initialisation for PCI machines - * - * Bits taken from various places. - */ -#include <linux/kernel.h> -#include <linux/pci.h> -#include <linux/init.h> - -#include <asm/irq.h> -#include <asm/mach/pci.h> -#include <asm/mach-types.h> - -static int irqmap_personal_server[] = { - IRQ_IN0, IRQ_IN1, IRQ_IN2, IRQ_IN3, 0, 0, 0, - IRQ_DOORBELLHOST, IRQ_DMA1, IRQ_DMA2, IRQ_PCI -}; - -static int personal_server_map_irq(const struct pci_dev *dev, u8 slot, u8 pin) -{ - unsigned char line; - - pci_read_config_byte(dev, PCI_INTERRUPT_LINE, &line); - - if (line > 0x40 && line <= 0x5f) { - /* line corresponds to the bit controlling this interrupt - * in the footbridge. Ignore the first 8 interrupt bits, - * look up the rest in the map. IN0 is bit number 8 - */ - return irqmap_personal_server[(line & 0x1f) - 8]; - } else if (line == 0) { - /* no interrupt */ - return 0; - } else - return irqmap_personal_server[(line - 1) & 3]; -} - -static struct hw_pci personal_server_pci __initdata = { - .map_irq = personal_server_map_irq, - .nr_controllers = 1, - .ops = &dc21285_ops, - .setup = dc21285_setup, - .preinit = dc21285_preinit, - .postinit = dc21285_postinit, -}; - -static int __init personal_pci_init(void) -{ - if (machine_is_personal_server()) - pci_common_init(&personal_server_pci); - return 0; -} - -subsys_initcall(personal_pci_init); diff --git a/arch/arm/mach-footbridge/personal.c b/arch/arm/mach-footbridge/personal.c deleted file mode 100644 index ca715754fc00..000000000000 --- a/arch/arm/mach-footbridge/personal.c +++ /dev/null @@ -1,25 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * linux/arch/arm/mach-footbridge/personal.c - * - * Personal server (Skiff) machine fixup - */ -#include <linux/init.h> -#include <linux/spinlock.h> - -#include <asm/hardware/dec21285.h> -#include <asm/mach-types.h> - -#include <asm/mach/arch.h> - -#include "common.h" - -MACHINE_START(PERSONAL_SERVER, "Compaq-PersonalServer") - /* Maintainer: Jamey Hicks / George France */ - .atag_offset = 0x100, - .map_io = footbridge_map_io, - .init_irq = footbridge_init_irq, - .init_time = footbridge_timer_init, - .restart = footbridge_restart, -MACHINE_END - -- 2.25.1

From: Leah Rumancik <leah.rumancik@gmail.com> mainline inclusion from mainline-5.13-rc1 commit 6c0912739699d8e4b6a87086401bf3ad3c59502d category: bugfix issue: #I4LRGQ Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... ------------------------------------------------- Upon file deletion, zero out all fields in ext4_dir_entry2 besides rec_len. In case sensitive data is stored in filenames, this ensures no potentially sensitive data is left in the directory entry upon deletion. Also, wipe these fields upon moving a directory entry during the conversion to an htree and when splitting htree nodes. The data wiped may still exist in the journal, but there are future commits planned to address this. Signed-off-by: Leah Rumancik <leah.rumancik@gmail.com> Link: https://lore.kernel.org/r/20210422180834.2242353-1-leah.rumancik@gmail.com Signed-off-by: Theodore Ts'o <tytso@mit.edu> Signed-off-by: Baokun Li <libaokun1@huawei.com> Reviewed-by: Zhang Yi <yi.zhang@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- fs/ext4/namei.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 4b97cca90c67..526960e34386 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -1776,7 +1776,14 @@ dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count, memcpy (to, de, rec_len); ((struct ext4_dir_entry_2 *) to)->rec_len = ext4_rec_len_to_disk(rec_len, blocksize); + + /* wipe dir_entry excluding the rec_len field */ de->inode = 0; + memset(&de->name_len, 0, ext4_rec_len_from_disk(de->rec_len, + blocksize) - + offsetof(struct ext4_dir_entry_2, + name_len)); + map++; to += rec_len; } @@ -2101,6 +2108,7 @@ static int make_indexed_dir(handle_t *handle, struct ext4_filename *fname, data2 = bh2->b_data; memcpy(data2, de, len); + memset(de, 0, len); /* wipe old data */ de = (struct ext4_dir_entry_2 *) data2; top = data2 + len; while ((char *)(de2 = ext4_next_entry(de, blocksize)) < top) @@ -2481,15 +2489,27 @@ int ext4_generic_delete_entry(struct inode *dir, entry_buf, buf_size, i)) return -EFSCORRUPTED; if (de == de_del) { - if (pde) + if (pde) { pde->rec_len = ext4_rec_len_to_disk( ext4_rec_len_from_disk(pde->rec_len, blocksize) + ext4_rec_len_from_disk(de->rec_len, blocksize), blocksize); - else + + /* wipe entire dir_entry */ + memset(de, 0, ext4_rec_len_from_disk(de->rec_len, + blocksize)); + } else { + /* wipe dir_entry excluding the rec_len field */ de->inode = 0; + memset(&de->name_len, 0, + ext4_rec_len_from_disk(de->rec_len, + blocksize) - + offsetof(struct ext4_dir_entry_2, + name_len)); + } + inode_inc_iversion(dir); return 0; } -- 2.25.1

From: Seunghun Han <kkamagui@gmail.com> maillist inclusion category: bugfix issue: #I4LRGQ CVE: CVE-2017-13693 Reference: https://lore.kernel.org/all/1503551495-33286-1-git-send-email-kkamagui@gmail... ------------------------------------------------- I found an ACPI cache leak in ACPI early termination and boot continuing case. When early termination is occurred due to malicious ACPI table, Linux kernel terminates ACPI function and continues to boot process. While kernel terminates ACPI function, kmem_cache_destroy() reports Acpi-Operand cache leak. Boot log of ACPI operand cache leak is as follows:
[ 0.585957] ACPI: Added _OSI(Module Device) [ 0.587218] ACPI: Added _OSI(Processor Device) [ 0.588530] ACPI: Added _OSI(3.0 _SCP Extensions) [ 0.589790] ACPI: Added _OSI(Processor Aggregator Device) [ 0.591534] ACPI Error: Illegal I/O port address/length above 64K: C806E00000004002/0x2 (20170303/hwvalid-155) [ 0.594351] ACPI Exception: AE_LIMIT, Unable to initialize fixed events (20170303/evevent-88) [ 0.597858] ACPI: Unable to start the ACPI Interpreter [ 0.599162] ACPI Error: Could not remove SCI handler (20170303/evmisc-281) [ 0.601836] kmem_cache_destroy Acpi-Operand: Slab cache still has objects [ 0.603556] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.12.0-rc5 #26 [ 0.605159] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 0.609177] Call Trace: [ 0.610063] ? dump_stack+0x5c/0x81 [ 0.611118] ? kmem_cache_destroy+0x1aa/0x1c0 [ 0.612632] ? acpi_sleep_proc_init+0x27/0x27 [ 0.613906] ? acpi_os_delete_cache+0xa/0x10 [ 0.617986] ? acpi_ut_delete_caches+0x3f/0x7b [ 0.619293] ? acpi_terminate+0xa/0x14 [ 0.620394] ? acpi_init+0x2af/0x34f [ 0.621616] ? __class_create+0x4c/0x80 [ 0.623412] ? video_setup+0x7f/0x7f [ 0.624585] ? acpi_sleep_proc_init+0x27/0x27 [ 0.625861] ? do_one_initcall+0x4e/0x1a0 [ 0.627513] ? kernel_init_freeable+0x19e/0x21f [ 0.628972] ? rest_init+0x80/0x80 [ 0.630043] ? kernel_init+0xa/0x100 [ 0.631084] ? ret_from_fork+0x25/0x30 [ 0.633343] vgaarb: loaded [ 0.635036] EDAC MC: Ver: 3.0.0 [ 0.638601] PCI: Probing PCI hardware [ 0.639833] PCI host bridge to bus 0000:00 [ 0.641031] pci_bus 0000:00: root bus resource [io 0x0000-0xffff] ... Continue to boot and log is omitted ...
I analyzed this memory leak in detail and found acpi_ds_obj_stack_pop_and_ delete() function miscalculated the top of the stack. acpi_ds_obj_stack_push() function uses walk_state->operand_index for start position of the top, but acpi_ds_obj_stack_pop_and_delete() function considers index 0 for it. Therefore, this causes acpi operand memory leak. This cache leak causes a security threat because an old kernel (<= 4.9) shows memory locations of kernel functions in stack dump. Some malicious users could use this information to neutralize kernel ASLR. I made a patch to fix ACPI operand cache leak. Signed-off-by: Seunghun Han <kkamagui@gmail.com> Signed-off-by: Yang Yingliang <yangyingliang@huawei.com> Reviewed-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Yang Yingliang <yangyingliang@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com> Reviewed-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- drivers/acpi/acpica/dsutils.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/acpi/acpica/dsutils.c b/drivers/acpi/acpica/dsutils.c index fb9ed5e1da89..2bdae8a25e08 100644 --- a/drivers/acpi/acpica/dsutils.c +++ b/drivers/acpi/acpica/dsutils.c @@ -668,6 +668,8 @@ acpi_ds_create_operands(struct acpi_walk_state *walk_state, union acpi_parse_object *arguments[ACPI_OBJ_NUM_OPERANDS]; u32 arg_count = 0; u32 index = walk_state->num_operands; + u32 prev_num_operands = walk_state->num_operands; + u32 new_num_operands; u32 i; ACPI_FUNCTION_TRACE_PTR(ds_create_operands, first_arg); @@ -696,6 +698,7 @@ acpi_ds_create_operands(struct acpi_walk_state *walk_state, /* Create the interpreter arguments, in reverse order */ + new_num_operands = index; index--; for (i = 0; i < arg_count; i++) { arg = arguments[index]; @@ -720,7 +723,11 @@ acpi_ds_create_operands(struct acpi_walk_state *walk_state, * pop everything off of the operand stack and delete those * objects */ - acpi_ds_obj_stack_pop_and_delete(arg_count, walk_state); + walk_state->num_operands = i; + acpi_ds_obj_stack_pop_and_delete(new_num_operands, walk_state); + + /* Restore operand count */ + walk_state->num_operands = prev_num_operands; ACPI_EXCEPTION((AE_INFO, status, "While creating Arg %u", index)); return_ACPI_STATUS(status); -- 2.25.1

From: Seunghun Han <kkamagui@gmail.com> maillist inclusion category: bugfix issue: #I4LRGQ CVE: CVE-2017-13694 Reference: https://lore.kernel.org/all/1498210597-112293-1-git-send-email-kkamagui@gmai... ------------------------------------------------- I'm Seunghun Han, and I work for National Security Research Institute of South Korea. I have been doing a research on ACPI and found an ACPI cache leak in ACPI early abort cases. Boot log of ACPI cache leak is as follows: [ 0.352414] ACPI: Added _OSI(Module Device) [ 0.353182] ACPI: Added _OSI(Processor Device) [ 0.353182] ACPI: Added _OSI(3.0 _SCP Extensions) [ 0.353182] ACPI: Added _OSI(Processor Aggregator Device) [ 0.356028] ACPI: Unable to start the ACPI Interpreter [ 0.356799] ACPI Error: Could not remove SCI handler (20170303/evmisc-281) [ 0.360215] kmem_cache_destroy Acpi-State: Slab cache still has objects [ 0.360648] CPU: 0 PID: 1 Comm: swapper/0 Tainted: G W 4.12.0-rc4-next-20170608+ #10 [ 0.361273] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 0.361873] Call Trace: [ 0.362243] ? dump_stack+0x5c/0x81 [ 0.362591] ? kmem_cache_destroy+0x1aa/0x1c0 [ 0.362944] ? acpi_sleep_proc_init+0x27/0x27 [ 0.363296] ? acpi_os_delete_cache+0xa/0x10 [ 0.363646] ? acpi_ut_delete_caches+0x6d/0x7b [ 0.364000] ? acpi_terminate+0xa/0x14 [ 0.364000] ? acpi_init+0x2af/0x34f [ 0.364000] ? __class_create+0x4c/0x80 [ 0.364000] ? video_setup+0x7f/0x7f [ 0.364000] ? acpi_sleep_proc_init+0x27/0x27 [ 0.364000] ? do_one_initcall+0x4e/0x1a0 [ 0.364000] ? kernel_init_freeable+0x189/0x20a [ 0.364000] ? rest_init+0xc0/0xc0 [ 0.364000] ? kernel_init+0xa/0x100 [ 0.364000] ? ret_from_fork+0x25/0x30 I analyzed this memory leak in detail. I found that “Acpi-State” cache and “Acpi-Parse” cache were merged because the size of cache objects was same slab cache size. I finally found “Acpi-Parse” cache and “Acpi-ParseExt” cache were leaked using SLAB_NEVER_MERGE flag in kmem_cache_create() function. Real ACPI cache leak point is as follows: [ 0.360101] ACPI: Added _OSI(Module Device) [ 0.360101] ACPI: Added _OSI(Processor Device) [ 0.360101] ACPI: Added _OSI(3.0 _SCP Extensions) [ 0.361043] ACPI: Added _OSI(Processor Aggregator Device) [ 0.364016] ACPI: Unable to start the ACPI Interpreter [ 0.365061] ACPI Error: Could not remove SCI handler (20170303/evmisc-281) [ 0.368174] kmem_cache_destroy Acpi-Parse: Slab cache still has objects [ 0.369332] CPU: 1 PID: 1 Comm: swapper/0 Tainted: G W 4.12.0-rc4-next-20170608+ #8 [ 0.371256] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 0.372000] Call Trace: [ 0.372000] ? dump_stack+0x5c/0x81 [ 0.372000] ? kmem_cache_destroy+0x1aa/0x1c0 [ 0.372000] ? acpi_sleep_proc_init+0x27/0x27 [ 0.372000] ? acpi_os_delete_cache+0xa/0x10 [ 0.372000] ? acpi_ut_delete_caches+0x56/0x7b [ 0.372000] ? acpi_terminate+0xa/0x14 [ 0.372000] ? acpi_init+0x2af/0x34f [ 0.372000] ? __class_create+0x4c/0x80 [ 0.372000] ? video_setup+0x7f/0x7f [ 0.372000] ? acpi_sleep_proc_init+0x27/0x27 [ 0.372000] ? do_one_initcall+0x4e/0x1a0 [ 0.372000] ? kernel_init_freeable+0x189/0x20a [ 0.372000] ? rest_init+0xc0/0xc0 [ 0.372000] ? kernel_init+0xa/0x100 [ 0.372000] ? ret_from_fork+0x25/0x30 [ 0.388039] kmem_cache_destroy Acpi-ParseExt: Slab cache still has objects [ 0.389063] CPU: 1 PID: 1 Comm: swapper/0 Tainted: G W 4.12.0-rc4-next-20170608+ #8 [ 0.390557] Hardware name: innotek GmbH VirtualBox/VirtualBox, BIOS VirtualBox 12/01/2006 [ 0.392000] Call Trace: [ 0.392000] ? dump_stack+0x5c/0x81 [ 0.392000] ? kmem_cache_destroy+0x1aa/0x1c0 [ 0.392000] ? acpi_sleep_proc_init+0x27/0x27 [ 0.392000] ? acpi_os_delete_cache+0xa/0x10 [ 0.392000] ? acpi_ut_delete_caches+0x6d/0x7b [ 0.392000] ? acpi_terminate+0xa/0x14 [ 0.392000] ? acpi_init+0x2af/0x34f [ 0.392000] ? __class_create+0x4c/0x80 [ 0.392000] ? video_setup+0x7f/0x7f [ 0.392000] ? acpi_sleep_proc_init+0x27/0x27 [ 0.392000] ? do_one_initcall+0x4e/0x1a0 [ 0.392000] ? kernel_init_freeable+0x189/0x20a [ 0.392000] ? rest_init+0xc0/0xc0 [ 0.392000] ? kernel_init+0xa/0x100 [ 0.392000] ? ret_from_fork+0x25/0x30 When early abort is occurred due to invalid ACPI information, Linux kernel terminates ACPI by calling acpi_terminate() function. The function calls acpi_ut_delete_caches() function to delete local caches (acpi_gbl_namespace_ cache, state_cache, operand_cache, ps_node_cache, ps_node_ext_cache). But the deletion codes in acpi_ut_delete_caches() function only delete slab caches using kmem_cache_destroy() function, therefore the cache objects should be flushed before acpi_ut_delete_caches() function. “Acpi-Parse” cache and “Acpi-ParseExt” cache are used in an AML parse function, acpi_ps_parse_loop(). The function should have flush codes to handle an error state due to invalid AML codes. This cache leak has a security threat because an old kernel (<= 4.9) shows memory locations of kernel functions in stack dump. Some malicious users could use this information to neutralize kernel ASLR. To fix ACPI cache leak for enhancing security, I made a patch which has flush codes in acpi_ps_parse_loop() function. I hope that this patch improves the security of Linux kernel. Thank you. Signed-off-by: Seunghun Han <kkamagui@gmail.com> Signed-off-by: Yang Yingliang <yangyingliang@huawei.com> Reviewed-by: Hanjun Guo <guohanjun@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- drivers/acpi/acpica/psobject.c | 53 ++++++++++------------------------ 1 file changed, 16 insertions(+), 37 deletions(-) diff --git a/drivers/acpi/acpica/psobject.c b/drivers/acpi/acpica/psobject.c index 2480c26c5171..b6c87898bbd8 100644 --- a/drivers/acpi/acpica/psobject.c +++ b/drivers/acpi/acpica/psobject.c @@ -636,7 +636,8 @@ acpi_status acpi_ps_complete_final_op(struct acpi_walk_state *walk_state, union acpi_parse_object *op, acpi_status status) { - acpi_status status2; + acpi_status return_status = AE_OK; + u8 ascending = TRUE; ACPI_FUNCTION_TRACE_PTR(ps_complete_final_op, walk_state); @@ -650,7 +651,8 @@ acpi_ps_complete_final_op(struct acpi_walk_state *walk_state, op)); do { if (op) { - if (walk_state->ascending_callback != NULL) { + if (ascending && + walk_state->ascending_callback != NULL) { walk_state->op = op; walk_state->op_info = acpi_ps_get_opcode_info(op->common. @@ -672,49 +674,26 @@ acpi_ps_complete_final_op(struct acpi_walk_state *walk_state, } if (status == AE_CTRL_TERMINATE) { - status = AE_OK; - - /* Clean up */ - do { - if (op) { - status2 = - acpi_ps_complete_this_op - (walk_state, op); - if (ACPI_FAILURE - (status2)) { - return_ACPI_STATUS - (status2); - } - } - - acpi_ps_pop_scope(& - (walk_state-> - parser_state), - &op, - &walk_state-> - arg_types, - &walk_state-> - arg_count); - - } while (op); - - return_ACPI_STATUS(status); + ascending = FALSE; + return_status = AE_CTRL_TERMINATE; } else if (ACPI_FAILURE(status)) { /* First error is most important */ - (void) - acpi_ps_complete_this_op(walk_state, - op); - return_ACPI_STATUS(status); + ascending = FALSE; + return_status = status; } } - status2 = acpi_ps_complete_this_op(walk_state, op); - if (ACPI_FAILURE(status2)) { - return_ACPI_STATUS(status2); + status = acpi_ps_complete_this_op(walk_state, op); + if (ACPI_FAILURE(status)) { + ascending = FALSE; + if (ACPI_SUCCESS(return_status) || + return_status == AE_CTRL_TERMINATE) { + return_status = status; + } } } @@ -724,5 +703,5 @@ acpi_ps_complete_final_op(struct acpi_walk_state *walk_state, } while (op); - return_ACPI_STATUS(status); + return_ACPI_STATUS(return_status); } -- 2.25.1

From: Guoqing Jiang <jgq516@gmail.com> mainline inclusion from mainline-v5.14-rc1 commit ad3fc798800fb7ca04c1dfc439dba946818048d8 category: bugfix issue: #I4LRGQ Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... ------------------------------------------------- The commit 41d2d848e5c0 ("md: improve io stats accounting") could cause double fault problem per the report [1], and also it is not correct to change ->bi_end_io if md don't own it, so let's revert it. And io stats accounting will be replemented in later commits. [1]. https://lore.kernel.org/linux-raid/3bf04253-3fad-434a-63a7-20214e38cf26@gmai... Fixes: 41d2d848e5c0 ("md: improve io stats accounting") Signed-off-by: Guoqing Jiang <jiangguoqing@kylinos.cn> Signed-off-by: Song Liu <song@kernel.org> Signed-off-by: Luo Meng <luomeng12@huawei.com> Conflicts: drivers/md/md.c Reviewed-by: Jason Yan <yanaijie@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- drivers/md/md.c | 45 --------------------------------------------- drivers/md/md.h | 1 - 2 files changed, 46 deletions(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index f16f190546ef..75c6a82fa9aa 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -459,30 +459,6 @@ void md_handle_request(struct mddev *mddev, struct bio *bio) } EXPORT_SYMBOL(md_handle_request); -struct md_io { - struct mddev *mddev; - bio_end_io_t *orig_bi_end_io; - void *orig_bi_private; - unsigned long start_time; - struct hd_struct *part; -}; - -static void md_end_io(struct bio *bio) -{ - struct md_io *md_io = bio->bi_private; - struct mddev *mddev = md_io->mddev; - - part_end_io_acct(md_io->part, bio, md_io->start_time); - - bio->bi_end_io = md_io->orig_bi_end_io; - bio->bi_private = md_io->orig_bi_private; - - mempool_free(md_io, &mddev->md_io_pool); - - if (bio->bi_end_io) - bio->bi_end_io(bio); -} - static blk_qc_t md_submit_bio(struct bio *bio) { const int rw = bio_data_dir(bio); @@ -507,21 +483,6 @@ static blk_qc_t md_submit_bio(struct bio *bio) return BLK_QC_T_NONE; } - if (bio->bi_end_io != md_end_io) { - struct md_io *md_io; - - md_io = mempool_alloc(&mddev->md_io_pool, GFP_NOIO); - md_io->mddev = mddev; - md_io->orig_bi_end_io = bio->bi_end_io; - md_io->orig_bi_private = bio->bi_private; - - bio->bi_end_io = md_end_io; - bio->bi_private = md_io; - - md_io->start_time = part_start_io_acct(mddev->gendisk, - &md_io->part, bio); - } - /* bio could be mergeable after passing to underlayer */ bio->bi_opf &= ~REQ_NOMERGE; @@ -5626,7 +5587,6 @@ static void md_free(struct kobject *ko) bioset_exit(&mddev->bio_set); bioset_exit(&mddev->sync_set); - mempool_exit(&mddev->md_io_pool); kfree(mddev); } @@ -5722,11 +5682,6 @@ static int md_alloc(dev_t dev, char *name) */ mddev->hold_active = UNTIL_STOP; - error = mempool_init_kmalloc_pool(&mddev->md_io_pool, BIO_POOL_SIZE, - sizeof(struct md_io)); - if (error) - goto abort; - error = -ENOMEM; mddev->queue = blk_alloc_queue(NUMA_NO_NODE); if (!mddev->queue) diff --git a/drivers/md/md.h b/drivers/md/md.h index 2175a5ac4f7c..c94811cf2600 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -487,7 +487,6 @@ struct mddev { struct bio_set sync_set; /* for sync operations like * metadata and bitmap writes */ - mempool_t md_io_pool; /* Generic flush handling. * The last to finish preflush schedules a worker to submit -- 2.25.1
participants (1)
-
yiyuchangchun@126.com