[PATCH OpenHarmony-5.10 00/15] sync OpenHarmony CVE bugfix

From: Yu Changchun <yuchangchun1@huawei.com> These patches are related with the following CVES: CVE-2021-3759 CVE-2021-3669 CVE-2021-3640 CVE-2019-16089 CVE-2020-27820 CVE-2021-43975 CVE-2021-43976 CVE-2020-12363 CVE-2020-12364 CVE-2021-4037 CVE-2021-3752 CVE-2021-4001 CVE-2021-4002 ------------------------------------------------------------------ Christoph Hellwig (1): xfs: fix up non-directory creation in SGID directories Daniel Borkmann (1): bpf: Fix toctou on read-only map's constant scalar tracking Desmond Cheong Zhi Xi (1): Bluetooth: switch to lock_sock in SCO Jeremy Cline (3): drm/nouveau: use drm_dev_unplug() during device removal drm/nouveau: Add a dedicated mutex for the clients list drm/nouveau: clean up all clients on device removal John Harrison (1): drm/i915/guc: Update to use firmware v49.0.1 Nadav Amit (1): hugetlbfs: flush TLBs correctly after huge_pmd_unshare Navid Emamdoost (1): nbd_genl_status: null check for nla_nest_start Rafael Aquini (1): ipc: replace costly bailout check in sysvipc_find_ipc() Takashi Iwai (1): Bluetooth: sco: Fix lock_sock() blockage by memcpy_from_msg() Vasily Averin (1): memcg: enable accounting of ipc resources Wang ShaoBo (1): Bluetooth: fix use-after-free error in lock_sock_nested() Zekun Shen (2): atlantic: Fix OOB read and write in hw_atl_utils_fw_rpc_wait mwifiex_usb: Fix skb_over_panic in mwifiex_usb_recv drivers/block/nbd.c | 6 + drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 +- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 18 --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 131 ++++++++++++++---- drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 80 ++++++----- drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h | 5 + drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 27 ++-- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 2 + drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h | 6 +- drivers/gpu/drm/nouveau/nouveau_drm.c | 42 +++++- drivers/gpu/drm/nouveau/nouveau_drv.h | 5 + .../aquantia/atlantic/hw_atl/hw_atl_utils.c | 10 ++ drivers/net/wireless/marvell/mwifiex/usb.c | 3 +- fs/xfs/xfs_inode.c | 14 +- include/linux/bpf.h | 3 +- ipc/msg.c | 2 +- ipc/sem.c | 9 +- ipc/shm.c | 2 +- ipc/util.c | 16 +-- kernel/bpf/syscall.c | 57 +++++--- kernel/bpf/verifier.c | 17 ++- mm/hugetlb.c | 23 ++- net/bluetooth/l2cap_sock.c | 10 +- net/bluetooth/sco.c | 42 +++--- 24 files changed, 361 insertions(+), 172 deletions(-) -- 2.25.1

From: Vasily Averin <vvs@virtuozzo.com> mainline inclusion from mainline commit 18319498fdd4cdf8c1c2c48cd432863b1f915d6f issue: #I4NRS5 CVE: CVE-2021-3759 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- When user creates IPC objects it forces kernel to allocate memory for these long-living objects. It makes sense to account them to restrict the host's memory consumption from inside the memcg-limited container. This patch enables accounting for IPC shared memory segments, messages semaphores and semaphore's undo lists. Link: https://lkml.kernel.org/r/d6507b06-4df6-78f8-6c54-3ae86e3b5339@virtuozzo.com Signed-off-by: Vasily Averin <vvs@virtuozzo.com> Reviewed-by: Shakeel Butt <shakeelb@google.com> Cc: Alexander Viro <viro@zeniv.linux.org.uk> Cc: Alexey Dobriyan <adobriyan@gmail.com> Cc: Andrei Vagin <avagin@gmail.com> Cc: Borislav Petkov <bp@alien8.de> Cc: Borislav Petkov <bp@suse.de> Cc: Christian Brauner <christian.brauner@ubuntu.com> Cc: Dmitry Safonov <0x7f454c46@gmail.com> Cc: "Eric W. Biederman" <ebiederm@xmission.com> Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Cc: "H. Peter Anvin" <hpa@zytor.com> Cc: Ingo Molnar <mingo@redhat.com> Cc: "J. Bruce Fields" <bfields@fieldses.org> Cc: Jeff Layton <jlayton@kernel.org> Cc: Jens Axboe <axboe@kernel.dk> Cc: Jiri Slaby <jirislaby@kernel.org> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kirill Tkhai <ktkhai@virtuozzo.com> Cc: Michal Hocko <mhocko@kernel.org> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Roman Gushchin <guro@fb.com> Cc: Serge Hallyn <serge@hallyn.com> Cc: Tejun Heo <tj@kernel.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Vladimir Davydov <vdavydov.dev@gmail.com> Cc: Yutian Yang <nglaive@gmail.com> Cc: Zefan Li <lizefan.x@bytedance.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Chen Jun <chenjun102@huawei.com> Conflicts: ipc/msg.c ipc/sem.c ipc/shm.c Reviewed-by: Wang Hui <john.wanghui@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- ipc/msg.c | 2 +- ipc/sem.c | 9 +++++---- ipc/shm.c | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/ipc/msg.c b/ipc/msg.c index 6e6c8e0c9380..8ded6b8f10a2 100644 --- a/ipc/msg.c +++ b/ipc/msg.c @@ -147,7 +147,7 @@ static int newque(struct ipc_namespace *ns, struct ipc_params *params) key_t key = params->key; int msgflg = params->flg; - msq = kvmalloc(sizeof(*msq), GFP_KERNEL); + msq = kvmalloc(sizeof(*msq), GFP_KERNEL_ACCOUNT); if (unlikely(!msq)) return -ENOMEM; diff --git a/ipc/sem.c b/ipc/sem.c index 7d9c06b0ad6e..d3b9b73cd9ca 100644 --- a/ipc/sem.c +++ b/ipc/sem.c @@ -511,7 +511,7 @@ static struct sem_array *sem_alloc(size_t nsems) if (nsems > (INT_MAX - sizeof(*sma)) / sizeof(sma->sems[0])) return NULL; - sma = kvzalloc(struct_size(sma, sems, nsems), GFP_KERNEL); + sma = kvzalloc(struct_size(sma, sems, nsems), GFP_KERNEL_ACCOUNT); if (unlikely(!sma)) return NULL; @@ -1852,7 +1852,7 @@ static inline int get_undo_list(struct sem_undo_list **undo_listp) undo_list = current->sysvsem.undo_list; if (!undo_list) { - undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL); + undo_list = kzalloc(sizeof(*undo_list), GFP_KERNEL_ACCOUNT); if (undo_list == NULL) return -ENOMEM; spin_lock_init(&undo_list->lock); @@ -1937,7 +1937,7 @@ static struct sem_undo *find_alloc_undo(struct ipc_namespace *ns, int semid) rcu_read_unlock(); /* step 2: allocate new undo structure */ - new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL); + new = kzalloc(sizeof(struct sem_undo) + sizeof(short)*nsems, GFP_KERNEL_ACCOUNT); if (!new) { ipc_rcu_putref(&sma->sem_perm, sem_rcu_free); return ERR_PTR(-ENOMEM); @@ -2001,7 +2001,8 @@ static long do_semtimedop(int semid, struct sembuf __user *tsops, if (nsops > ns->sc_semopm) return -E2BIG; if (nsops > SEMOPM_FAST) { - sops = kvmalloc_array(nsops, sizeof(*sops), GFP_KERNEL); + sops = kvmalloc_array(nsops, sizeof(*sops), + GFP_KERNEL_ACCOUNT); if (sops == NULL) return -ENOMEM; } diff --git a/ipc/shm.c b/ipc/shm.c index e25c7c6106bc..fce0b7b12939 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -619,7 +619,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) ns->shm_tot + numpages > ns->shm_ctlall) return -ENOSPC; - shp = kvmalloc(sizeof(*shp), GFP_KERNEL); + shp = kvmalloc(sizeof(*shp), GFP_KERNEL_ACCOUNT); if (unlikely(!shp)) return -ENOMEM; -- 2.25.1

From: Rafael Aquini <aquini@redhat.com> mainline inclusion from mainline-5.15-rc1 commit 20401d1058f3f841f35a594ac2fc1293710e55b9 issue: #I4NRS5 CVE: CVE-2021-3669 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- sysvipc_find_ipc() was left with a costly way to check if the offset position fed to it is bigger than the total number of IPC IDs in use. So much so that the time it takes to iterate over /proc/sysvipc/* files grows exponentially for a custom benchmark that creates "N" SYSV shm segments and then times the read of /proc/sysvipc/shm (milliseconds): 12 msecs to read 1024 segs from /proc/sysvipc/shm 18 msecs to read 2048 segs from /proc/sysvipc/shm 65 msecs to read 4096 segs from /proc/sysvipc/shm 325 msecs to read 8192 segs from /proc/sysvipc/shm 1303 msecs to read 16384 segs from /proc/sysvipc/shm 5182 msecs to read 32768 segs from /proc/sysvipc/shm The root problem lies with the loop that computes the total amount of ids in use to check if the "pos" feeded to sysvipc_find_ipc() grew bigger than "ids->in_use". That is a quite inneficient way to get to the maximum index in the id lookup table, specially when that value is already provided by struct ipc_ids.max_idx. This patch follows up on the optimization introduced via commit 15df03c879836 ("sysvipc: make get_maxid O(1) again") and gets rid of the aforementioned costly loop replacing it by a simpler checkpoint based on ipc_get_maxidx() returned value, which allows for a smooth linear increase in time complexity for the same custom benchmark: 2 msecs to read 1024 segs from /proc/sysvipc/shm 2 msecs to read 2048 segs from /proc/sysvipc/shm 4 msecs to read 4096 segs from /proc/sysvipc/shm 9 msecs to read 8192 segs from /proc/sysvipc/shm 19 msecs to read 16384 segs from /proc/sysvipc/shm 39 msecs to read 32768 segs from /proc/sysvipc/shm Link: https://lkml.kernel.org/r/20210809203554.1562989-1-aquini@redhat.com Signed-off-by: Rafael Aquini <aquini@redhat.com> Acked-by: Davidlohr Bueso <dbueso@suse.de> Acked-by: Manfred Spraul <manfred@colorfullife.com> Cc: Waiman Long <llong@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Chen Jun <chenjun102@huawei.com> Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- ipc/util.c | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/ipc/util.c b/ipc/util.c index cfa0045e748d..cc46cfa06e04 100644 --- a/ipc/util.c +++ b/ipc/util.c @@ -754,21 +754,13 @@ struct pid_namespace *ipc_seq_pid_ns(struct seq_file *s) static struct kern_ipc_perm *sysvipc_find_ipc(struct ipc_ids *ids, loff_t pos, loff_t *new_pos) { - struct kern_ipc_perm *ipc; - int total, id; - - total = 0; - for (id = 0; id < pos && total < ids->in_use; id++) { - ipc = idr_find(&ids->ipcs_idr, id); - if (ipc != NULL) - total++; - } + struct kern_ipc_perm *ipc = NULL; + int max_idx = ipc_get_maxidx(ids); - ipc = NULL; - if (total >= ids->in_use) + if (max_idx == -1 || pos > max_idx) goto out; - for (; pos < ipc_mni; pos++) { + for (; pos <= max_idx; pos++) { ipc = idr_find(&ids->ipcs_idr, pos); if (ipc != NULL) { rcu_read_lock(); -- 2.25.1

From: Wang ShaoBo <bobo.shaobowang@huawei.com> maillist inclusion category: bugfix issue: #I4NRS5 CVE: CVE-2021-3752 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... ------------------------------------------------- use-after-free error in lock_sock_nested is reported: [ 179.140137][ T3731] ===================================================== [ 179.142675][ T3731] BUG: KMSAN: use-after-free in lock_sock_nested+0x280/0x2c0 [ 179.145494][ T3731] CPU: 4 PID: 3731 Comm: kworker/4:2 Not tainted 5.12.0-rc6+ #54 [ 179.148432][ T3731] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1ubuntu1.1 04/01/2014 [ 179.151806][ T3731] Workqueue: events l2cap_chan_timeout [ 179.152730][ T3731] Call Trace: [ 179.153301][ T3731] dump_stack+0x24c/0x2e0 [ 179.154063][ T3731] kmsan_report+0xfb/0x1e0 [ 179.154855][ T3731] __msan_warning+0x5c/0xa0 [ 179.155579][ T3731] lock_sock_nested+0x280/0x2c0 [ 179.156436][ T3731] ? kmsan_get_metadata+0x116/0x180 [ 179.157257][ T3731] l2cap_sock_teardown_cb+0xb8/0x890 [ 179.158154][ T3731] ? __msan_metadata_ptr_for_load_8+0x10/0x20 [ 179.159141][ T3731] ? kmsan_get_metadata+0x116/0x180 [ 179.159994][ T3731] ? kmsan_get_shadow_origin_ptr+0x84/0xb0 [ 179.160959][ T3731] ? l2cap_sock_recv_cb+0x420/0x420 [ 179.161834][ T3731] l2cap_chan_del+0x3e1/0x1d50 [ 179.162608][ T3731] ? kmsan_get_metadata+0x116/0x180 [ 179.163435][ T3731] ? kmsan_get_shadow_origin_ptr+0x84/0xb0 [ 179.164406][ T3731] l2cap_chan_close+0xeea/0x1050 [ 179.165189][ T3731] ? kmsan_internal_unpoison_shadow+0x42/0x70 [ 179.166180][ T3731] l2cap_chan_timeout+0x1da/0x590 [ 179.167066][ T3731] ? __msan_metadata_ptr_for_load_8+0x10/0x20 [ 179.168023][ T3731] ? l2cap_chan_create+0x560/0x560 [ 179.168818][ T3731] process_one_work+0x121d/0x1ff0 [ 179.169598][ T3731] worker_thread+0x121b/0x2370 [ 179.170346][ T3731] kthread+0x4ef/0x610 [ 179.171010][ T3731] ? process_one_work+0x1ff0/0x1ff0 [ 179.171828][ T3731] ? kthread_blkcg+0x110/0x110 [ 179.172587][ T3731] ret_from_fork+0x1f/0x30 [ 179.173348][ T3731] [ 179.173752][ T3731] Uninit was created at: [ 179.174409][ T3731] kmsan_internal_poison_shadow+0x5c/0xf0 [ 179.175373][ T3731] kmsan_slab_free+0x76/0xc0 [ 179.176060][ T3731] kfree+0x3a5/0x1180 [ 179.176664][ T3731] __sk_destruct+0x8af/0xb80 [ 179.177375][ T3731] __sk_free+0x812/0x8c0 [ 179.178032][ T3731] sk_free+0x97/0x130 [ 179.178686][ T3731] l2cap_sock_release+0x3d5/0x4d0 [ 179.179457][ T3731] sock_close+0x150/0x450 [ 179.180117][ T3731] __fput+0x6bd/0xf00 [ 179.180787][ T3731] ____fput+0x37/0x40 [ 179.181481][ T3731] task_work_run+0x140/0x280 [ 179.182219][ T3731] do_exit+0xe51/0x3e60 [ 179.182930][ T3731] do_group_exit+0x20e/0x450 [ 179.183656][ T3731] get_signal+0x2dfb/0x38f0 [ 179.184344][ T3731] arch_do_signal_or_restart+0xaa/0xe10 [ 179.185266][ T3731] exit_to_user_mode_prepare+0x2d2/0x560 [ 179.186136][ T3731] syscall_exit_to_user_mode+0x35/0x60 [ 179.186984][ T3731] do_syscall_64+0xc5/0x140 [ 179.187681][ T3731] entry_SYSCALL_64_after_hwframe+0x44/0xae [ 179.188604][ T3731] ===================================================== In our case, there are two Thread A and B: Context: Thread A: Context: Thread B: l2cap_chan_timeout() __se_sys_shutdown() l2cap_chan_close() l2cap_sock_shutdown() l2cap_chan_del() l2cap_chan_close() l2cap_sock_teardown_cb() l2cap_sock_teardown_cb() Once l2cap_sock_teardown_cb() excuted, this sock will be marked as SOCK_ZAPPED, and can be treated as killable in l2cap_sock_kill() if sock_orphan() has excuted, at this time we close sock through sock_close() which end to call l2cap_sock_kill() like Thread C: Context: Thread C: sock_close() l2cap_sock_release() sock_orphan() l2cap_sock_kill() #free sock if refcnt is 1 If C completed, Once A or B reaches l2cap_sock_teardown_cb() again, use-after-free happened. We should set chan->data to NULL if sock is destructed, for telling teardown operation is not allowed in l2cap_sock_teardown_cb(), and also we should avoid killing an already killed socket in l2cap_sock_close_cb(). Signed-off-by: Wang ShaoBo <bobo.shaobowang@huawei.com> Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com> Signed-off-by: Marcel Holtmann <marcel@holtmann.org> Signed-off-by: Chen Jun <chenjun102@huawei.com> Reviewed-by: weiyang wang <wangweiyang2@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- net/bluetooth/l2cap_sock.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index c99d65ef13b1..160c016a5dfb 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -1508,6 +1508,9 @@ static void l2cap_sock_close_cb(struct l2cap_chan *chan) { struct sock *sk = chan->data; + if (!sk) + return; + l2cap_sock_kill(sk); } @@ -1516,6 +1519,9 @@ static void l2cap_sock_teardown_cb(struct l2cap_chan *chan, int err) struct sock *sk = chan->data; struct sock *parent; + if (!sk) + return; + BT_DBG("chan %p state %s", chan, state_to_string(chan->state)); /* This callback can be called both for server (BT_LISTEN) @@ -1707,8 +1713,10 @@ static void l2cap_sock_destruct(struct sock *sk) { BT_DBG("sk %p", sk); - if (l2cap_pi(sk)->chan) + if (l2cap_pi(sk)->chan) { + l2cap_pi(sk)->chan->data = NULL; l2cap_chan_put(l2cap_pi(sk)->chan); + } if (l2cap_pi(sk)->rx_busy_skb) { kfree_skb(l2cap_pi(sk)->rx_busy_skb); -- 2.25.1

From: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com> mainline inclusion from mainline-v5.14-rc1 commit 27c24fda62b601d6f9ca5e992502578c4310876f category: bugfix issue: #I4NRS5 CVE: CVE-2021-3640 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... ------------------------------------------------- Since sco_sock_timeout is now scheduled using delayed work, it is no longer run in SOFTIRQ context. Hence bh_lock_sock is no longer necessary in SCO to synchronise between user contexts and SOFTIRQ processing. As such, calls to bh_lock_sock should be replaced with lock_sock to synchronize with other concurrent processes that use lock_sock. Signed-off-by: Desmond Cheong Zhi Xi <desmondcheongzx@gmail.com> Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com> Signed-off-by: Lijun Fang <fanglijun3@huawei.com> Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com> Reviewed-by: Jason Yan <yanaijie@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- net/bluetooth/sco.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 7c24a9acbc45..cb2b11683082 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -93,10 +93,10 @@ static void sco_sock_timeout(struct work_struct *work) BT_DBG("sock %p state %d", sk, sk->sk_state); - bh_lock_sock(sk); + lock_sock(sk); sk->sk_err = ETIMEDOUT; sk->sk_state_change(sk); - bh_unlock_sock(sk); + release_sock(sk); sock_put(sk); } @@ -192,10 +192,10 @@ static void sco_conn_del(struct hci_conn *hcon, int err) if (sk) { sock_hold(sk); - bh_lock_sock(sk); + lock_sock(sk); sco_sock_clear_timer(sk); sco_chan_del(sk, err); - bh_unlock_sock(sk); + release_sock(sk); sock_put(sk); /* Ensure no more work items will run before freeing conn. */ @@ -1101,10 +1101,10 @@ static void sco_conn_ready(struct sco_conn *conn) if (sk) { sco_sock_clear_timer(sk); - bh_lock_sock(sk); + lock_sock(sk); sk->sk_state = BT_CONNECTED; sk->sk_state_change(sk); - bh_unlock_sock(sk); + release_sock(sk); } else { sco_conn_lock(conn); @@ -1119,12 +1119,12 @@ static void sco_conn_ready(struct sco_conn *conn) return; } - bh_lock_sock(parent); + lock_sock(parent); sk = sco_sock_alloc(sock_net(parent), NULL, BTPROTO_SCO, GFP_ATOMIC, 0); if (!sk) { - bh_unlock_sock(parent); + release_sock(parent); sco_conn_unlock(conn); return; } @@ -1145,7 +1145,7 @@ static void sco_conn_ready(struct sco_conn *conn) /* Wake up parent */ parent->sk_data_ready(parent); - bh_unlock_sock(parent); + release_sock(parent); sco_conn_unlock(conn); } -- 2.25.1

From: Takashi Iwai <tiwai@suse.de> mainline inclusion from mainline-v5.14-rc7 commit 99c23da0eed4fd20cae8243f2b51e10e66aa0951 category: bugfix issue: #I4NRS5 CVE: CVE-2021-3640 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... ------------------------------------------------- The sco_send_frame() also takes lock_sock() during memcpy_from_msg() call that may be endlessly blocked by a task with userfaultd technique, and this will result in a hung task watchdog trigger. Just like the similar fix for hci_sock_sendmsg() in commit 92c685dc5de0 ("Bluetooth: reorganize functions..."), this patch moves the memcpy_from_msg() out of lock_sock() for addressing the hang. This should be the last piece for fixing CVE-2021-3640 after a few already queued fixes. Signed-off-by: Takashi Iwai <tiwai@suse.de> Signed-off-by: Marcel Holtmann <marcel@holtmann.org> Signed-off-by: Lijun Fang <fanglijun3@huawei.com> Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com> Reviewed-by: Jason Yan <yanaijie@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- net/bluetooth/sco.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index cb2b11683082..918df8d0e8b6 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -281,7 +281,8 @@ static int sco_connect(struct hci_dev *hdev, struct sock *sk) return err; } -static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len) +static int sco_send_frame(struct sock *sk, void *buf, int len, + unsigned int msg_flags) { struct sco_conn *conn = sco_pi(sk)->conn; struct sk_buff *skb; @@ -293,15 +294,11 @@ static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len) BT_DBG("sk %p len %d", sk, len); - skb = bt_skb_send_alloc(sk, len, msg->msg_flags & MSG_DONTWAIT, &err); + skb = bt_skb_send_alloc(sk, len, msg_flags & MSG_DONTWAIT, &err); if (!skb) return err; - if (memcpy_from_msg(skb_put(skb, len), msg, len)) { - kfree_skb(skb); - return -EFAULT; - } - + memcpy(skb_put(skb, len), buf, len); hci_send_sco(conn->hcon, skb); return len; @@ -726,6 +723,7 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { struct sock *sk = sock->sk; + void *buf; int err; BT_DBG("sock %p, sk %p", sock, sk); @@ -737,14 +735,24 @@ static int sco_sock_sendmsg(struct socket *sock, struct msghdr *msg, if (msg->msg_flags & MSG_OOB) return -EOPNOTSUPP; + buf = kmalloc(len, GFP_KERNEL); + if (!buf) + return -ENOMEM; + + if (memcpy_from_msg(buf, msg, len)) { + kfree(buf); + return -EFAULT; + } + lock_sock(sk); if (sk->sk_state == BT_CONNECTED) - err = sco_send_frame(sk, msg, len); + err = sco_send_frame(sk, buf, len, msg->msg_flags); else err = -ENOTCONN; release_sock(sk); + kfree(buf); return err; } -- 2.25.1

From: Navid Emamdoost <navid.emamdoost@gmail.com> maillist inclusion category: bugfix issue: #I4NRS5 CVE: CVE-2019-16089 Reference: https://lore.kernel.org/lkml/20190911164013.27364-1-navid.emamdoost@gmail.co... --------------------------- nla_nest_start may fail and return NULL. The check is inserted, and errno is selected based on other call sites within the same source code. Update: removed extra new line. v3 Update: added release reply, thanks to Michal Kubecek for pointing out. Signed-off-by: Navid Emamdoost <navid.emamdoost@gmail.com> Reviewed-by: Michal Kubecek <mkubecek@suse.cz> Signed-off-by: Lijun Fang <fanglijun3@huawei.com> Reviewed-by: Jason Yan <yanaijie@huawei.com> Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- drivers/block/nbd.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index 59c452fff835..f7f1d9dbdc80 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -2313,6 +2313,12 @@ static int nbd_genl_status(struct sk_buff *skb, struct genl_info *info) } dev_list = nla_nest_start_noflag(reply, NBD_ATTR_DEVICE_LIST); + if (!dev_list) { + nlmsg_free(reply); + ret = -EMSGSIZE; + goto out; + } + if (index == -1) { ret = idr_for_each(&nbd_index_idr, &status_cb, reply); if (ret) { -- 2.25.1

From: Jeremy Cline <jcline@redhat.com> mainline inclusion from mainline commit aff2299e0d81b26304ccc6a1ec0170e437f38efc issue: #I4NRS5 CVE: CVE-2020-27820 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Nouveau does not currently support hot-unplugging, but it still makes sense to switch from drm_dev_unregister() to drm_dev_unplug(). drm_dev_unplug() calls drm_dev_unregister() after marking the device as unplugged, but only after any device critical sections are finished. Since nouveau isn't using drm_dev_enter() and drm_dev_exit(), there are no critical sections so this is nearly functionally equivalent. However, the DRM layer does check to see if the device is unplugged, and if it is returns appropriate error codes. In the future nouveau can add critical sections in order to truly support hot-unplugging. Cc: stable@vger.kernel.org # 5.4+ Signed-off-by: Jeremy Cline <jcline@redhat.com> Reviewed-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Ben Skeggs <bskeggs@redhat.com> Tested-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Karol Herbst <kherbst@redhat.com> Link: https://patchwork.freedesktop.org/patch/msgid/20201125202648.5220-2-jcline@r... Link: https://gitlab.freedesktop.org/drm/nouveau/-/merge_requests/14 Signed-off-by: Chen Jun <chenjun102@huawei.com> Reviewed-by: weiyang wang <wangweiyang2@huawei.com> Reviewed-by: Jason Yan <yanaijie@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- drivers/gpu/drm/nouveau/nouveau_drm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 42fc5c813a9b..470b3c8f7392 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -792,7 +792,7 @@ nouveau_drm_device_remove(struct drm_device *dev) struct nvkm_client *client; struct nvkm_device *device; - drm_dev_unregister(dev); + drm_dev_unplug(dev); dev->irq_enabled = false; client = nvxx_client(&drm->client.base); -- 2.25.1

From: Jeremy Cline <jcline@redhat.com> mainline inclusion from mainline commit abae9164a421bc4a41a3769f01ebcd1f9d955e0e issue: #I4NRS5 CVE: CVE-2020-27820 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Rather than protecting the nouveau_drm clients list with the lock within the "client" nouveau_cli, add a dedicated lock to serialize access to the list. This is both clearer and necessary to avoid lockdep being upset with us when we need to iterate through all the clients in the list and potentially lock their mutex, which is the same class as the lock protecting the entire list. Cc: stable@vger.kernel.org # 5.4+ Signed-off-by: Jeremy Cline <jcline@redhat.com> Reviewed-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Ben Skeggs <bskeggs@redhat.com> Tested-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Karol Herbst <kherbst@redhat.com> Link: https://patchwork.freedesktop.org/patch/msgid/20201125202648.5220-3-jcline@r... Link: https://gitlab.freedesktop.org/drm/nouveau/-/merge_requests/14 Signed-off-by: Chen Jun <chenjun102@huawei.com> Reviewed-by: weiyang wang <wangweiyang2@huawei.com> Reviewed-by: Jason Yan <yanaijie@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- drivers/gpu/drm/nouveau/nouveau_drm.c | 10 ++++++---- drivers/gpu/drm/nouveau/nouveau_drv.h | 5 +++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 470b3c8f7392..8201e9d11df9 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -557,6 +557,7 @@ nouveau_drm_device_init(struct drm_device *dev) nvkm_dbgopt(nouveau_debug, "DRM"); INIT_LIST_HEAD(&drm->clients); + mutex_init(&drm->clients_lock); spin_lock_init(&drm->tile.lock); /* workaround an odd issue on nvc1 by disabling the device's @@ -654,6 +655,7 @@ nouveau_drm_device_fini(struct drm_device *dev) nouveau_cli_fini(&drm->client); nouveau_cli_fini(&drm->master); nvif_parent_dtor(&drm->parent); + mutex_destroy(&drm->clients_lock); kfree(drm); } @@ -1086,9 +1088,9 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv) fpriv->driver_priv = cli; - mutex_lock(&drm->client.mutex); + mutex_lock(&drm->clients_lock); list_add(&cli->head, &drm->clients); - mutex_unlock(&drm->client.mutex); + mutex_unlock(&drm->clients_lock); done: if (ret && cli) { @@ -1114,9 +1116,9 @@ nouveau_drm_postclose(struct drm_device *dev, struct drm_file *fpriv) nouveau_abi16_fini(cli->abi16); mutex_unlock(&cli->mutex); - mutex_lock(&drm->client.mutex); + mutex_lock(&drm->clients_lock); list_del(&cli->head); - mutex_unlock(&drm->client.mutex); + mutex_unlock(&drm->clients_lock); nouveau_cli_fini(cli); kfree(cli); diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h index b8025507a9e4..8b252dca0fc3 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drv.h +++ b/drivers/gpu/drm/nouveau/nouveau_drv.h @@ -142,6 +142,11 @@ struct nouveau_drm { struct list_head clients; + /** + * @clients_lock: Protects access to the @clients list of &struct nouveau_cli. + */ + struct mutex clients_lock; + u8 old_pm_cap; struct { -- 2.25.1

From: Jeremy Cline <jcline@redhat.com> mainline inclusion from mainline commit f55aaf63bde0d0336c3823bb3713bd4a464abbcf issue: #I4NRS5 CVE: CVE-2020-27820 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- The postclose handler can run after the device has been removed (or the driver has been unbound) since userspace clients are free to hold the file open as long as they want. Because the device removal callback frees the entire nouveau_drm structure, any reference to it in the postclose handler will result in a use-after-free. To reproduce this, one must simply open the device file, unbind the driver (or physically remove the device), and then close the device file. This was found and can be reproduced easily with the IGT core_hotunplug tests. To avoid this, all clients are cleaned up in the device finalization rather than deferring it to the postclose handler, and the postclose handler is protected by a critical section which ensures the drm_dev_unplug() and the postclose handler won't race. This is not an ideal fix, since as I understand the proposed plan for the kernel<->userspace interface for hotplug support, destroying the client before the file is closed will cause problems. However, I believe to properly fix this issue, the lifetime of the nouveau_drm structure needs to be extended to match the drm_device, and this proved to be a rather invasive change. Thus, I've broken this out so the fix can be easily backported. This fixes with the two previous commits CVE-2020-27820 (Karol). Cc: stable@vger.kernel.org # 5.4+ Signed-off-by: Jeremy Cline <jcline@redhat.com> Reviewed-by: Lyude Paul <lyude@redhat.com> Reviewed-by: Ben Skeggs <bskeggs@redhat.com> Tested-by: Karol Herbst <kherbst@redhat.com> Signed-off-by: Karol Herbst <kherbst@redhat.com> Link: https://patchwork.freedesktop.org/patch/msgid/20201125202648.5220-4-jcline@r... Link: https://gitlab.freedesktop.org/drm/nouveau/-/merge_requests/14 Signed-off-by: Chen Jun <chenjun102@huawei.com> Reviewed-by: weiyang wang <wangweiyang2@huawei.com> Reviewed-by: Jason Yan <yanaijie@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- drivers/gpu/drm/nouveau/nouveau_drm.c | 30 +++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c index 8201e9d11df9..ac96b6ab44c0 100644 --- a/drivers/gpu/drm/nouveau/nouveau_drm.c +++ b/drivers/gpu/drm/nouveau/nouveau_drm.c @@ -628,6 +628,7 @@ nouveau_drm_device_init(struct drm_device *dev) static void nouveau_drm_device_fini(struct drm_device *dev) { + struct nouveau_cli *cli, *temp_cli; struct nouveau_drm *drm = nouveau_drm(dev); if (nouveau_pmops_runtime()) { @@ -652,6 +653,24 @@ nouveau_drm_device_fini(struct drm_device *dev) nouveau_ttm_fini(drm); nouveau_vga_fini(drm); + /* + * There may be existing clients from as-yet unclosed files. For now, + * clean them up here rather than deferring until the file is closed, + * but this likely not correct if we want to support hot-unplugging + * properly. + */ + mutex_lock(&drm->clients_lock); + list_for_each_entry_safe(cli, temp_cli, &drm->clients, head) { + list_del(&cli->head); + mutex_lock(&cli->mutex); + if (cli->abi16) + nouveau_abi16_fini(cli->abi16); + mutex_unlock(&cli->mutex); + nouveau_cli_fini(cli); + kfree(cli); + } + mutex_unlock(&drm->clients_lock); + nouveau_cli_fini(&drm->client); nouveau_cli_fini(&drm->master); nvif_parent_dtor(&drm->parent); @@ -1108,6 +1127,16 @@ nouveau_drm_postclose(struct drm_device *dev, struct drm_file *fpriv) { struct nouveau_cli *cli = nouveau_cli(fpriv); struct nouveau_drm *drm = nouveau_drm(dev); + int dev_index; + + /* + * The device is gone, and as it currently stands all clients are + * cleaned up in the removal codepath. In the future this may change + * so that we can support hot-unplugging, but for now we immediately + * return to avoid a double-free situation. + */ + if (!drm_dev_enter(dev, &dev_index)) + return; pm_runtime_get_sync(dev->dev); @@ -1124,6 +1153,7 @@ nouveau_drm_postclose(struct drm_device *dev, struct drm_file *fpriv) kfree(cli); pm_runtime_mark_last_busy(dev->dev); pm_runtime_put_autosuspend(dev->dev); + drm_dev_exit(dev_index); } static const struct drm_ioctl_desc -- 2.25.1

From: Zekun Shen <bruceshenzk@gmail.com> mainline inclusion from mainline commit b922f622592af76b57cbc566eaeccda0b31a3496 issue: #I4NRS5 CVE: CVE-2021-43975 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- This bug report shows up when running our research tools. The reports is SOOB read, but it seems SOOB write is also possible a few lines below. In details, fw.len and sw.len are inputs coming from io. A len over the size of self->rpc triggers SOOB. The patch fixes the bugs by adding sanity checks. The bugs are triggerable with compromised/malfunctioning devices. They are potentially exploitable given they first leak up to 0xffff bytes and able to overwrite the region later. The patch is tested with QEMU emulater. This is NOT tested with a real device. Attached is the log we found by fuzzing. BUG: KASAN: slab-out-of-bounds in hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic] Read of size 4 at addr ffff888016260b08 by task modprobe/213 CPU: 0 PID: 213 Comm: modprobe Not tainted 5.6.0 #1 Call Trace: dump_stack+0x76/0xa0 print_address_description.constprop.0+0x16/0x200 ? hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic] ? hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic] __kasan_report.cold+0x37/0x7c ? aq_hw_read_reg_bit+0x60/0x70 [atlantic] ? hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic] kasan_report+0xe/0x20 hw_atl_utils_fw_upload_dwords+0x393/0x3c0 [atlantic] hw_atl_utils_fw_rpc_call+0x95/0x130 [atlantic] hw_atl_utils_fw_rpc_wait+0x176/0x210 [atlantic] hw_atl_utils_mpi_create+0x229/0x2e0 [atlantic] ? hw_atl_utils_fw_rpc_wait+0x210/0x210 [atlantic] ? hw_atl_utils_initfw+0x9f/0x1c8 [atlantic] hw_atl_utils_initfw+0x12a/0x1c8 [atlantic] aq_nic_ndev_register+0x88/0x650 [atlantic] ? aq_nic_ndev_init+0x235/0x3c0 [atlantic] aq_pci_probe+0x731/0x9b0 [atlantic] ? aq_pci_func_init+0xc0/0xc0 [atlantic] local_pci_probe+0xd3/0x160 pci_device_probe+0x23f/0x3e0 Reported-by: Brendan Dolan-Gavitt <brendandg@nyu.edu> Signed-off-by: Zekun Shen <bruceshenzk@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net> Signed-off-by: Chen Jun <chenjun102@huawei.com> Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com> Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- .../ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c index 404cbf60d3f2..da1d185f6d22 100644 --- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c +++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c @@ -559,6 +559,11 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self, goto err_exit; if (fw.len == 0xFFFFU) { + if (sw.len > sizeof(self->rpc)) { + printk(KERN_INFO "Invalid sw len: %x\n", sw.len); + err = -EINVAL; + goto err_exit; + } err = hw_atl_utils_fw_rpc_call(self, sw.len); if (err < 0) goto err_exit; @@ -567,6 +572,11 @@ int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self, if (rpc) { if (fw.len) { + if (fw.len > sizeof(self->rpc)) { + printk(KERN_INFO "Invalid fw len: %x\n", fw.len); + err = -EINVAL; + goto err_exit; + } err = hw_atl_utils_fw_downld_dwords(self, self->rpc_addr, -- 2.25.1

From: Zekun Shen <bruceshenzk@gmail.com> maillist inclusion category: bugfix issue: #I4NRS5 CVE: CVE-2021-43976 Reference: https://patchwork.kernel.org/project/linux-wireless/patch/YX4CqjfRcTa6bVL+@Z... -------------------------------- Currently, with an unknown recv_type, mwifiex_usb_recv just return -1 without restoring the skb. Next time mwifiex_usb_rx_complete is invoked with the same skb, calling skb_put causes skb_over_panic. The bug is triggerable with a compromised/malfunctioning usb device. After applying the patch, skb_over_panic no longer shows up with the same input. Attached is the panic report from fuzzing. skbuff: skb_over_panic: text:000000003bf1b5fa len:2048 put:4 head:00000000dd6a115b data:000000000a9445d8 tail:0x844 end:0x840 dev:<NULL> kernel BUG at net/core/skbuff.c:109! invalid opcode: 0000 [#1] SMP KASAN NOPTI CPU: 0 PID: 198 Comm: in:imklog Not tainted 5.6.0 #60 RIP: 0010:skb_panic+0x15f/0x161 Call Trace: <IRQ> ? mwifiex_usb_rx_complete+0x26b/0xfcd [mwifiex_usb] skb_put.cold+0x24/0x24 mwifiex_usb_rx_complete+0x26b/0xfcd [mwifiex_usb] __usb_hcd_giveback_urb+0x1e4/0x380 usb_giveback_urb_bh+0x241/0x4f0 ? __hrtimer_run_queues+0x316/0x740 ? __usb_hcd_giveback_urb+0x380/0x380 tasklet_action_common.isra.0+0x135/0x330 __do_softirq+0x18c/0x634 irq_exit+0x114/0x140 smp_apic_timer_interrupt+0xde/0x380 apic_timer_interrupt+0xf/0x20 </IRQ> Reported-by: Brendan Dolan-Gavitt <brendandg@nyu.edu> Signed-off-by: Zekun Shen <bruceshenzk@gmail.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com> Reviewed-by: Wei Yongjun <weiyongjun1@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- drivers/net/wireless/marvell/mwifiex/usb.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/marvell/mwifiex/usb.c b/drivers/net/wireless/marvell/mwifiex/usb.c index 426e39d4ccf0..6d81e87861ca 100644 --- a/drivers/net/wireless/marvell/mwifiex/usb.c +++ b/drivers/net/wireless/marvell/mwifiex/usb.c @@ -130,7 +130,8 @@ static int mwifiex_usb_recv(struct mwifiex_adapter *adapter, default: mwifiex_dbg(adapter, ERROR, "unknown recv_type %#x\n", recv_type); - return -1; + ret = -1; + goto exit_restore_skb; } break; case MWIFIEX_USB_EP_DATA: -- 2.25.1

From: John Harrison <John.C.Harrison@Intel.com> mainline inclusion from mainline-v5.11-rc1 commit c784e5249e773689e38d2bc1749f08b986621a26 issue: #I4NRS5 CVE: CVE-2020-12363, CVE-2020-12364 ----------------------------------------------------------------- The latest GuC firmware includes a number of interface changes that require driver updates to match. * Starting from Gen11, the ID to be provided to GuC needs to contain the engine class in bits [0..2] and the instance in bits [3..6]. NOTE: this patch breaks pointer dereferences in some existing GuC functions that use the guc_id to dereference arrays but these functions are not used for now as we have GuC submission disabled and we will update these functions in follow up patch which requires new IDs. * The new GuC requires the additional data structure (ADS) and associated 'private_data' pointer to be setup. This is basically a scratch area of memory that the GuC owns. The size is read from the CSS header. * There is now a physical to logical engine mapping table in the ADS which needs to be configured in order for the firmware to load. For now, the table is initialised with a 1 to 1 mapping. * GUC_CTL_CTXINFO has been removed from the initialization params. * reg_state_buffer is maintained internally by the GuC as part of the private data. * The ADS layout has changed significantly. This patch updates the shared structure and also adds better documentation of the layout. * While i915 does not use GuC doorbells, the firmware now requires that some initialisation is done. * The number of engine classes and instances supported in the ADS has been increased. Signed-off-by: John Harrison <John.C.Harrison@Intel.com> Signed-off-by: Matthew Brost <matthew.brost@intel.com> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Signed-off-by: Oscar Mateo <oscar.mateo@intel.com> Signed-off-by: Michel Thierry <michel.thierry@intel.com> Signed-off-by: Rodrigo Vivi <rodrigo.vivi@intel.com> Signed-off-by: Michal Wajdeczko <michal.wajdeczko@intel.com> Cc: Michal Winiarski <michal.winiarski@intel.com> Cc: Tomasz Lis <tomasz.lis@intel.com> Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Reviewed-by: Daniele Ceraolo Spurio <daniele.ceraolospurio@intel.com> Signed-off-by: Joonas Lahtinen <joonas.lahtinen@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20201028145826.2949180-2-John.... Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 +- drivers/gpu/drm/i915/gt/uc/intel_guc.c | 18 --- drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 131 +++++++++++++++---- drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h | 80 +++++------ drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h | 5 + drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 27 ++-- drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h | 2 + drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h | 6 +- 8 files changed, 176 insertions(+), 96 deletions(-) diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c index a19537706ed1..c940ac3aae2f 100644 --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c @@ -305,8 +305,9 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id) engine->i915 = i915; engine->gt = gt; engine->uncore = gt->uncore; - engine->hw_id = engine->guc_id = info->hw_id; engine->mmio_base = __engine_mmio_base(i915, info->mmio_bases); + engine->hw_id = info->hw_id; + engine->guc_id = MAKE_GUC_ID(info->class, info->instance); engine->class = info->class; engine->instance = info->instance; diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc.c b/drivers/gpu/drm/i915/gt/uc/intel_guc.c index 942c7c187adb..6909da1e1a73 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc.c @@ -213,23 +213,6 @@ static u32 guc_ctl_feature_flags(struct intel_guc *guc) return flags; } -static u32 guc_ctl_ctxinfo_flags(struct intel_guc *guc) -{ - u32 flags = 0; - - if (intel_guc_submission_is_used(guc)) { - u32 ctxnum, base; - - base = intel_guc_ggtt_offset(guc, guc->stage_desc_pool); - ctxnum = GUC_MAX_STAGE_DESCRIPTORS / 16; - - base >>= PAGE_SHIFT; - flags |= (base << GUC_CTL_BASE_ADDR_SHIFT) | - (ctxnum << GUC_CTL_CTXNUM_IN16_SHIFT); - } - return flags; -} - static u32 guc_ctl_log_params_flags(struct intel_guc *guc) { u32 offset = intel_guc_ggtt_offset(guc, guc->log.vma) >> PAGE_SHIFT; @@ -291,7 +274,6 @@ static void guc_init_params(struct intel_guc *guc) BUILD_BUG_ON(sizeof(guc->params) != GUC_CTL_MAX_DWORDS * sizeof(u32)); - params[GUC_CTL_CTXINFO] = guc_ctl_ctxinfo_flags(guc); params[GUC_CTL_LOG_PARAMS] = guc_ctl_log_params_flags(guc); params[GUC_CTL_FEATURE] = guc_ctl_feature_flags(guc); params[GUC_CTL_DEBUG] = guc_ctl_debug_flags(guc); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c index d44061033f23..7950d28beb8c 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c @@ -10,11 +10,52 @@ /* * The Additional Data Struct (ADS) has pointers for different buffers used by - * the GuC. One single gem object contains the ADS struct itself (guc_ads), the - * scheduling policies (guc_policies), a structure describing a collection of - * register sets (guc_mmio_reg_state) and some extra pages for the GuC to save - * its internal state for sleep. + * the GuC. One single gem object contains the ADS struct itself (guc_ads) and + * all the extra buffers indirectly linked via the ADS struct's entries. + * + * Layout of the ADS blob allocated for the GuC: + * + * +---------------------------------------+ <== base + * | guc_ads | + * +---------------------------------------+ + * | guc_policies | + * +---------------------------------------+ + * | guc_gt_system_info | + * +---------------------------------------+ + * | guc_clients_info | + * +---------------------------------------+ + * | guc_ct_pool_entry[size] | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned + * | private data | + * +---------------------------------------+ + * | padding | + * +---------------------------------------+ <== 4K aligned */ +struct __guc_ads_blob { + struct guc_ads ads; + struct guc_policies policies; + struct guc_gt_system_info system_info; + struct guc_clients_info clients_info; + struct guc_ct_pool_entry ct_pool[GUC_CT_POOL_SIZE]; +} __packed; + +static u32 guc_ads_private_data_size(struct intel_guc *guc) +{ + return PAGE_ALIGN(guc->fw.private_data_size); +} + +static u32 guc_ads_private_data_offset(struct intel_guc *guc) +{ + return PAGE_ALIGN(sizeof(struct __guc_ads_blob)); +} + +static u32 guc_ads_blob_size(struct intel_guc *guc) +{ + return guc_ads_private_data_offset(guc) + + guc_ads_private_data_size(guc); +} static void guc_policy_init(struct guc_policy *policy) { @@ -48,26 +89,37 @@ static void guc_ct_pool_entries_init(struct guc_ct_pool_entry *pool, u32 num) memset(pool, 0, num * sizeof(*pool)); } +static void guc_mapping_table_init(struct intel_gt *gt, + struct guc_gt_system_info *system_info) +{ + unsigned int i, j; + struct intel_engine_cs *engine; + enum intel_engine_id id; + + /* Table must be set to invalid values for entries not used */ + for (i = 0; i < GUC_MAX_ENGINE_CLASSES; ++i) + for (j = 0; j < GUC_MAX_INSTANCES_PER_CLASS; ++j) + system_info->mapping_table[i][j] = + GUC_MAX_INSTANCES_PER_CLASS; + + for_each_engine(engine, gt, id) { + u8 guc_class = engine->class; + + system_info->mapping_table[guc_class][engine->instance] = + engine->instance; + } +} + /* * The first 80 dwords of the register state context, containing the * execlists and ppgtt registers. */ #define LR_HW_CONTEXT_SIZE (80 * sizeof(u32)) -/* The ads obj includes the struct itself and buffers passed to GuC */ -struct __guc_ads_blob { - struct guc_ads ads; - struct guc_policies policies; - struct guc_mmio_reg_state reg_state; - struct guc_gt_system_info system_info; - struct guc_clients_info clients_info; - struct guc_ct_pool_entry ct_pool[GUC_CT_POOL_SIZE]; - u8 reg_state_buffer[GUC_S3_SAVE_SPACE_PAGES * PAGE_SIZE]; -} __packed; - static void __guc_ads_init(struct intel_guc *guc) { struct intel_gt *gt = guc_to_gt(guc); + struct drm_i915_private *i915 = gt->i915; struct __guc_ads_blob *blob = guc->ads_blob; const u32 skipped_size = LRC_PPHWSP_SZ * PAGE_SIZE + LR_HW_CONTEXT_SIZE; u32 base; @@ -99,13 +151,25 @@ static void __guc_ads_init(struct intel_guc *guc) } /* System info */ - blob->system_info.slice_enabled = hweight8(gt->info.sseu.slice_mask); - blob->system_info.rcs_enabled = 1; - blob->system_info.bcs_enabled = 1; + blob->system_info.engine_enabled_masks[RENDER_CLASS] = 1; + blob->system_info.engine_enabled_masks[COPY_ENGINE_CLASS] = 1; + blob->system_info.engine_enabled_masks[VIDEO_DECODE_CLASS] = VDBOX_MASK(gt); + blob->system_info.engine_enabled_masks[VIDEO_ENHANCEMENT_CLASS] = VEBOX_MASK(gt); + + blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED] = + hweight8(gt->info.sseu.slice_mask); + blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK] = + gt->info.vdbox_sfc_access; + + if (INTEL_GEN(i915) >= 12 && !IS_DGFX(i915)) { + u32 distdbreg = intel_uncore_read(gt->uncore, + GEN12_DIST_DBS_POPULATED); + blob->system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI] = + ((distdbreg >> GEN12_DOORBELLS_PER_SQIDI_SHIFT) & + GEN12_DOORBELLS_PER_SQIDI) + 1; + } - blob->system_info.vdbox_enable_mask = VDBOX_MASK(gt); - blob->system_info.vebox_enable_mask = VEBOX_MASK(gt); - blob->system_info.vdbox_sfc_support_mask = gt->info.vdbox_sfc_access; + guc_mapping_table_init(guc_to_gt(guc), &blob->system_info); base = intel_guc_ggtt_offset(guc, guc->ads_vma); @@ -118,11 +182,12 @@ static void __guc_ads_init(struct intel_guc *guc) /* ADS */ blob->ads.scheduler_policies = base + ptr_offset(blob, policies); - blob->ads.reg_state_buffer = base + ptr_offset(blob, reg_state_buffer); - blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state); blob->ads.gt_system_info = base + ptr_offset(blob, system_info); blob->ads.clients_info = base + ptr_offset(blob, clients_info); + /* Private Data */ + blob->ads.private_data = base + guc_ads_private_data_offset(guc); + i915_gem_object_flush_map(guc->ads_vma->obj); } @@ -135,14 +200,15 @@ static void __guc_ads_init(struct intel_guc *guc) */ int intel_guc_ads_create(struct intel_guc *guc) { - const u32 size = PAGE_ALIGN(sizeof(struct __guc_ads_blob)); + u32 size; int ret; GEM_BUG_ON(guc->ads_vma); + size = guc_ads_blob_size(guc); + ret = intel_guc_allocate_and_map_vma(guc, size, &guc->ads_vma, (void **)&guc->ads_blob); - if (ret) return ret; @@ -156,6 +222,18 @@ void intel_guc_ads_destroy(struct intel_guc *guc) i915_vma_unpin_and_release(&guc->ads_vma, I915_VMA_RELEASE_MAP); } +static void guc_ads_private_data_reset(struct intel_guc *guc) +{ + u32 size; + + size = guc_ads_private_data_size(guc); + if (!size) + return; + + memset((void *)guc->ads_blob + guc_ads_private_data_offset(guc), 0, + size); +} + /** * intel_guc_ads_reset() - prepares GuC Additional Data Struct for reuse * @guc: intel_guc struct @@ -168,5 +246,8 @@ void intel_guc_ads_reset(struct intel_guc *guc) { if (!guc->ads_vma) return; + __guc_ads_init(guc); + + guc_ads_private_data_reset(guc); } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h index a6b733c146c9..79c560d9c0b6 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_fwif.h @@ -26,8 +26,8 @@ #define GUC_VIDEO_ENGINE2 4 #define GUC_MAX_ENGINES_NUM (GUC_VIDEO_ENGINE2 + 1) -#define GUC_MAX_ENGINE_CLASSES 5 -#define GUC_MAX_INSTANCES_PER_CLASS 16 +#define GUC_MAX_ENGINE_CLASSES 16 +#define GUC_MAX_INSTANCES_PER_CLASS 32 #define GUC_DOORBELL_INVALID 256 @@ -62,12 +62,7 @@ #define GUC_STAGE_DESC_ATTR_PCH BIT(6) #define GUC_STAGE_DESC_ATTR_TERMINATED BIT(7) -/* New GuC control data */ -#define GUC_CTL_CTXINFO 0 -#define GUC_CTL_CTXNUM_IN16_SHIFT 0 -#define GUC_CTL_BASE_ADDR_SHIFT 12 - -#define GUC_CTL_LOG_PARAMS 1 +#define GUC_CTL_LOG_PARAMS 0 #define GUC_LOG_VALID (1 << 0) #define GUC_LOG_NOTIFY_ON_HALF_FULL (1 << 1) #define GUC_LOG_ALLOC_IN_MEGABYTE (1 << 3) @@ -79,11 +74,11 @@ #define GUC_LOG_ISR_MASK (0x7 << GUC_LOG_ISR_SHIFT) #define GUC_LOG_BUF_ADDR_SHIFT 12 -#define GUC_CTL_WA 2 -#define GUC_CTL_FEATURE 3 +#define GUC_CTL_WA 1 +#define GUC_CTL_FEATURE 2 #define GUC_CTL_DISABLE_SCHEDULER (1 << 14) -#define GUC_CTL_DEBUG 4 +#define GUC_CTL_DEBUG 3 #define GUC_LOG_VERBOSITY_SHIFT 0 #define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT) #define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT) @@ -97,12 +92,37 @@ #define GUC_LOG_DISABLED (1 << 6) #define GUC_PROFILE_ENABLED (1 << 7) -#define GUC_CTL_ADS 5 +#define GUC_CTL_ADS 4 #define GUC_ADS_ADDR_SHIFT 1 #define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT) #define GUC_CTL_MAX_DWORDS (SOFT_SCRATCH_COUNT - 2) /* [1..14] */ +/* Generic GT SysInfo data types */ +#define GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED 0 +#define GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK 1 +#define GUC_GENERIC_GT_SYSINFO_DOORBELL_COUNT_PER_SQIDI 2 +#define GUC_GENERIC_GT_SYSINFO_MAX 16 + +/* + * The class goes in bits [0..2] of the GuC ID, the instance in bits [3..6]. + * Bit 7 can be used for operations that apply to all engine classes&instances. + */ +#define GUC_ENGINE_CLASS_SHIFT 0 +#define GUC_ENGINE_CLASS_MASK (0x7 << GUC_ENGINE_CLASS_SHIFT) +#define GUC_ENGINE_INSTANCE_SHIFT 3 +#define GUC_ENGINE_INSTANCE_MASK (0xf << GUC_ENGINE_INSTANCE_SHIFT) +#define GUC_ENGINE_ALL_INSTANCES BIT(7) + +#define MAKE_GUC_ID(class, instance) \ + (((class) << GUC_ENGINE_CLASS_SHIFT) | \ + ((instance) << GUC_ENGINE_INSTANCE_SHIFT)) + +#define GUC_ID_TO_ENGINE_CLASS(guc_id) \ + (((guc_id) & GUC_ENGINE_CLASS_MASK) >> GUC_ENGINE_CLASS_SHIFT) +#define GUC_ID_TO_ENGINE_INSTANCE(guc_id) \ + (((guc_id) & GUC_ENGINE_INSTANCE_MASK) >> GUC_ENGINE_INSTANCE_SHIFT) + /* Work item for submitting workloads into work queue of GuC. */ struct guc_wq_item { u32 header; @@ -336,11 +356,6 @@ struct guc_policies { } __packed; /* GuC MMIO reg state struct */ - - -#define GUC_REGSET_MAX_REGISTERS 64 -#define GUC_S3_SAVE_SPACE_PAGES 10 - struct guc_mmio_reg { u32 offset; u32 value; @@ -348,28 +363,18 @@ struct guc_mmio_reg { #define GUC_REGSET_MASKED (1 << 0) } __packed; -struct guc_mmio_regset { - struct guc_mmio_reg registers[GUC_REGSET_MAX_REGISTERS]; - u32 values_valid; - u32 number_of_registers; -} __packed; - /* GuC register sets */ -struct guc_mmio_reg_state { - struct guc_mmio_regset engine_reg[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; - u32 reserved[98]; +struct guc_mmio_reg_set { + u32 address; + u16 count; + u16 reserved; } __packed; /* HW info */ struct guc_gt_system_info { - u32 slice_enabled; - u32 rcs_enabled; - u32 reserved0; - u32 bcs_enabled; - u32 vdbox_enable_mask; - u32 vdbox_sfc_support_mask; - u32 vebox_enable_mask; - u32 reserved[9]; + u8 mapping_table[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; + u32 engine_enabled_masks[GUC_MAX_ENGINE_CLASSES]; + u32 generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_MAX]; } __packed; /* Clients info */ @@ -390,15 +395,16 @@ struct guc_clients_info { /* GuC Additional Data Struct */ struct guc_ads { - u32 reg_state_addr; - u32 reg_state_buffer; + struct guc_mmio_reg_set reg_state_list[GUC_MAX_ENGINE_CLASSES][GUC_MAX_INSTANCES_PER_CLASS]; + u32 reserved0; u32 scheduler_policies; u32 gt_system_info; u32 clients_info; u32 control_data; u32 golden_context_lrca[GUC_MAX_ENGINE_CLASSES]; u32 eng_state_size[GUC_MAX_ENGINE_CLASSES]; - u32 reserved[16]; + u32 private_data; + u32 reserved[15]; } __packed; /* GuC logging structures */ diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h index 1949346e714e..b37fc2ffaef2 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_reg.h @@ -118,6 +118,11 @@ struct guc_doorbell_info { #define GEN8_DRB_VALID (1<<0) #define GEN8_DRBREGU(x) _MMIO(0x1000 + (x) * 8 + 4) +#define GEN12_DIST_DBS_POPULATED _MMIO(0xd08) +#define GEN12_DOORBELLS_PER_SQIDI_SHIFT 16 +#define GEN12_DOORBELLS_PER_SQIDI (0xff) +#define GEN12_SQIDIS_DOORBELL_EXIST (0xffff) + #define DE_GUCRMR _MMIO(0x44054) #define GUC_BCS_RCS_IER _MMIO(0xC550) diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index 80e8b6c3bc8c..ee4ac3922277 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -44,23 +44,19 @@ void intel_uc_fw_change_status(struct intel_uc_fw *uc_fw, * List of required GuC and HuC binaries per-platform. * Must be ordered based on platform + revid, from newer to older. * - * TGL 35.2 is interface-compatible with 33.0 for previous Gens. The deltas - * between 33.0 and 35.2 are only related to new additions to support new Gen12 - * features. - * * Note that RKL uses the same firmware as TGL. */ #define INTEL_UC_FIRMWARE_DEFS(fw_def, guc_def, huc_def) \ - fw_def(ROCKETLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 5, 0)) \ - fw_def(TIGERLAKE, 0, guc_def(tgl, 35, 2, 0), huc_def(tgl, 7, 5, 0)) \ - fw_def(ELKHARTLAKE, 0, guc_def(ehl, 33, 0, 4), huc_def(ehl, 9, 0, 0)) \ - fw_def(ICELAKE, 0, guc_def(icl, 33, 0, 0), huc_def(icl, 9, 0, 0)) \ - fw_def(COMETLAKE, 5, guc_def(cml, 33, 0, 0), huc_def(cml, 4, 0, 0)) \ - fw_def(COFFEELAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \ - fw_def(GEMINILAKE, 0, guc_def(glk, 33, 0, 0), huc_def(glk, 4, 0, 0)) \ - fw_def(KABYLAKE, 0, guc_def(kbl, 33, 0, 0), huc_def(kbl, 4, 0, 0)) \ - fw_def(BROXTON, 0, guc_def(bxt, 33, 0, 0), huc_def(bxt, 2, 0, 0)) \ - fw_def(SKYLAKE, 0, guc_def(skl, 33, 0, 0), huc_def(skl, 2, 0, 0)) + fw_def(ROCKETLAKE, 0, guc_def(tgl, 49, 0, 1), huc_def(tgl, 7, 5, 0)) \ + fw_def(TIGERLAKE, 0, guc_def(tgl, 49, 0, 1), huc_def(tgl, 7, 5, 0)) \ + fw_def(ELKHARTLAKE, 0, guc_def(ehl, 49, 0, 1), huc_def(ehl, 9, 0, 0)) \ + fw_def(ICELAKE, 0, guc_def(icl, 49, 0, 1), huc_def(icl, 9, 0, 0)) \ + fw_def(COMETLAKE, 5, guc_def(cml, 49, 0, 1), huc_def(cml, 4, 0, 0)) \ + fw_def(COFFEELAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ + fw_def(GEMINILAKE, 0, guc_def(glk, 49, 0, 1), huc_def(glk, 4, 0, 0)) \ + fw_def(KABYLAKE, 0, guc_def(kbl, 49, 0, 1), huc_def(kbl, 4, 0, 0)) \ + fw_def(BROXTON, 0, guc_def(bxt, 49, 0, 1), huc_def(bxt, 2, 0, 0)) \ + fw_def(SKYLAKE, 0, guc_def(skl, 49, 0, 1), huc_def(skl, 2, 0, 0)) #define __MAKE_UC_FW_PATH(prefix_, name_, major_, minor_, patch_) \ "i915/" \ @@ -371,6 +367,9 @@ int intel_uc_fw_fetch(struct intel_uc_fw *uc_fw) } } + if (uc_fw->type == INTEL_UC_FW_TYPE_GUC) + uc_fw->private_data_size = css->private_data_size; + obj = i915_gem_object_create_shmem_from_data(i915, fw->data, fw->size); if (IS_ERR(obj)) { err = PTR_ERR(obj); diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h index 23d3a423ac0f..99bb1fe1af66 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.h @@ -88,6 +88,8 @@ struct intel_uc_fw { u32 rsa_size; u32 ucode_size; + + u32 private_data_size; }; #ifdef CONFIG_DRM_I915_DEBUG_GUC diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h index 029214cdedd5..e41ffc7a7fbc 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw_abi.h @@ -69,7 +69,11 @@ struct uc_css_header { #define CSS_SW_VERSION_UC_MAJOR (0xFF << 16) #define CSS_SW_VERSION_UC_MINOR (0xFF << 8) #define CSS_SW_VERSION_UC_PATCH (0xFF << 0) - u32 reserved[14]; + u32 reserved0[13]; + union { + u32 private_data_size; /* only applies to GuC */ + u32 reserved1; + }; u32 header_info; } __packed; static_assert(sizeof(struct uc_css_header) == 128); -- 2.25.1

From: Christoph Hellwig <hch@lst.de> mainline inclusion from mainline-v5.12-rc1 commit 01ea173e103edd5ec41acec65b9261b87e123fc2 issue: #I4NRS5 CVE: CVE-2021-4037 ----------------------------------------------------------------- XFS always inherits the SGID bit if it is set on the parent inode, while the generic inode_init_owner does not do this in a few cases where it can create a possible security problem, see commit 0fa3ecd87848 ("Fix up non-directory creation in SGID directories") for details. Switch XFS to use the generic helper for the normal path to fix this, just keeping the simple field inheritance open coded for the case of the non-sgid case with the bsdgrpid mount option. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: Christian Brauner <christian.brauner@ubuntu.com> Signed-off-by: Christoph Hellwig <hch@lst.de> Reviewed-by: Darrick J. Wong <djwong@kernel.org> Signed-off-by: Darrick J. Wong <djwong@kernel.org> conflicts: fs/xfs/xfs_inode.c Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- fs/xfs/xfs_inode.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2bfbcf28b1bd..511c9363e1a9 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -802,6 +802,7 @@ xfs_ialloc( xfs_buf_t **ialloc_context, xfs_inode_t **ipp) { + struct inode *dir = pip ? VFS_I(pip) : NULL; struct xfs_mount *mp = tp->t_mountp; xfs_ino_t ino; xfs_inode_t *ip; @@ -847,18 +848,17 @@ xfs_ialloc( return error; ASSERT(ip != NULL); inode = VFS_I(ip); - inode->i_mode = mode; set_nlink(inode, nlink); - inode->i_uid = current_fsuid(); inode->i_rdev = rdev; ip->i_d.di_projid = prid; - if (pip && XFS_INHERIT_GID(pip)) { - inode->i_gid = VFS_I(pip)->i_gid; - if ((VFS_I(pip)->i_mode & S_ISGID) && S_ISDIR(mode)) - inode->i_mode |= S_ISGID; + if (dir && !(dir->i_mode & S_ISGID) && + (mp->m_flags & XFS_MOUNT_GRPID)) { + inode->i_uid = current_fsuid(); + inode->i_gid = dir->i_gid; + inode->i_mode = mode; } else { - inode->i_gid = current_fsgid(); + inode_init_owner(inode, dir, mode); } /* -- 2.25.1

From: Daniel Borkmann <daniel@iogearbox.net> mainline inclusion from mainline-v5.16-rc1 commit 353050be4c19e102178ccc05988101887c25ae53 category: bugfix issue: #I4NRS5 CVE: CVE-2021-4001 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?i... -------------------------------- Commit a23740ec43ba ("bpf: Track contents of read-only maps as scalars") is checking whether maps are read-only both from BPF program side and user space side, and then, given their content is constant, reading out their data via map->ops->map_direct_value_addr() which is then subsequently used as known scalar value for the register, that is, it is marked as __mark_reg_known() with the read value at verification time. Before a23740ec43ba, the register content was marked as an unknown scalar so the verifier could not make any assumptions about the map content. The current implementation however is prone to a TOCTOU race, meaning, the value read as known scalar for the register is not guaranteed to be exactly the same at a later point when the program is executed, and as such, the prior made assumptions of the verifier with regards to the program will be invalid which can cause issues such as OOB access, etc. While the BPF_F_RDONLY_PROG map flag is always fixed and required to be specified at map creation time, the map->frozen property is initially set to false for the map given the map value needs to be populated, e.g. for global data sections. Once complete, the loader "freezes" the map from user space such that no subsequent updates/deletes are possible anymore. For the rest of the lifetime of the map, this freeze one-time trigger cannot be undone anymore after a successful BPF_MAP_FREEZE cmd return. Meaning, any new BPF_* cmd calls which would update/delete map entries will be rejected with -EPERM since map_get_sys_perms() removes the FMODE_CAN_WRITE permission. This also means that pending update/delete map entries must still complete before this guarantee is given. This corner case is not an issue for loaders since they create and prepare such program private map in successive steps. However, a malicious user is able to trigger this TOCTOU race in two different ways: i) via userfaultfd, and ii) via batched updates. For i) userfaultfd is used to expand the competition interval, so that map_update_elem() can modify the contents of the map after map_freeze() and bpf_prog_load() were executed. This works, because userfaultfd halts the parallel thread which triggered a map_update_elem() at the time where we copy key/value from the user buffer and this already passed the FMODE_CAN_WRITE capability test given at that time the map was not "frozen". Then, the main thread performs the map_freeze() and bpf_prog_load(), and once that had completed successfully, the other thread is woken up to complete the pending map_update_elem() which then changes the map content. For ii) the idea of the batched update is similar, meaning, when there are a large number of updates to be processed, it can increase the competition interval between the two. It is therefore possible in practice to modify the contents of the map after executing map_freeze() and bpf_prog_load(). One way to fix both i) and ii) at the same time is to expand the use of the map's map->writecnt. The latter was introduced in fc9702273e2e ("bpf: Add mmap() support for BPF_MAP_TYPE_ARRAY") and further refined in 1f6cb19be2e2 ("bpf: Prevent re-mmap()'ing BPF map as writable for initially r/o mapping") with the rationale to make a writable mmap()'ing of a map mutually exclusive with read-only freezing. The counter indicates writable mmap() mappings and then prevents/fails the freeze operation. Its semantics can be expanded beyond just mmap() by generally indicating ongoing write phases. This would essentially span any parallel regular and batched flavor of update/delete operation and then also have map_freeze() fail with -EBUSY. For the check_mem_access() in the verifier we expand upon the bpf_map_is_rdonly() check ensuring that all last pending writes have completed via bpf_map_write_active() test. Once the map->frozen is set and bpf_map_write_active() indicates a map->writecnt of 0 only then we are really guaranteed to use the map's data as known constants. For map->frozen being set and pending writes in process of still being completed we fall back to marking that register as unknown scalar so we don't end up making assumptions about it. With this, both TOCTOU reproducers from i) and ii) are fixed. Note that the map->writecnt has been converted into a atomic64 in the fix in order to avoid a double freeze_mutex mutex_{un,}lock() pair when updating map->writecnt in the various map update/delete BPF_* cmd flavors. Spanning the freeze_mutex over entire map update/delete operations in syscall side would not be possible due to then causing everything to be serialized. Similarly, something like synchronize_rcu() after setting map->frozen to wait for update/deletes to complete is not possible either since it would also have to span the user copy which can sleep. On the libbpf side, this won't break d66562fba1ce ("libbpf: Add BPF object skeleton support") as the anonymous mmap()-ed "map initialization image" is remapped as a BPF map-backed mmap()-ed memory where for .rodata it's non-writable. Fixes: a23740ec43ba ("bpf: Track contents of read-only maps as scalars") Reported-by: w1tcher.bupt@gmail.com Signed-off-by: Daniel Borkmann <daniel@iogearbox.net> Acked-by: Andrii Nakryiko <andrii@kernel.org> Signed-off-by: Alexei Starovoitov <ast@kernel.org> conflicts: kernel/bpf/syscall.c Signed-off-by: He Fengqing <hefengqing@huawei.com> Reviewed-by: Kuohai Xu <xukuohai@huawei.com> Reviewed-by: Xiu Jianfeng <xiujianfeng@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- include/linux/bpf.h | 3 ++- kernel/bpf/syscall.c | 57 +++++++++++++++++++++++++++---------------- kernel/bpf/verifier.c | 17 ++++++++++++- 3 files changed, 54 insertions(+), 23 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1f62a4eec283..474a0d852614 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -173,7 +173,7 @@ struct bpf_map { atomic64_t usercnt; struct work_struct work; struct mutex freeze_mutex; - u64 writecnt; /* writable mmap cnt; protected by freeze_mutex */ + atomic64_t writecnt; }; static inline bool map_value_has_spin_lock(const struct bpf_map *map) @@ -1252,6 +1252,7 @@ void bpf_map_charge_move(struct bpf_map_memory *dst, void *bpf_map_area_alloc(u64 size, int numa_node); void *bpf_map_area_mmapable_alloc(u64 size, int numa_node); void bpf_map_area_free(void *base); +bool bpf_map_write_active(const struct bpf_map *map); void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); int generic_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5b6da64da46d..bb9a9cb1f321 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -127,6 +127,21 @@ static struct bpf_map *find_and_alloc_map(union bpf_attr *attr) return map; } +static void bpf_map_write_active_inc(struct bpf_map *map) +{ + atomic64_inc(&map->writecnt); +} + +static void bpf_map_write_active_dec(struct bpf_map *map) +{ + atomic64_dec(&map->writecnt); +} + +bool bpf_map_write_active(const struct bpf_map *map) +{ + return atomic64_read(&map->writecnt) != 0; +} + static u32 bpf_map_value_size(struct bpf_map *map) { if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || @@ -588,11 +603,8 @@ static void bpf_map_mmap_open(struct vm_area_struct *vma) { struct bpf_map *map = vma->vm_file->private_data; - if (vma->vm_flags & VM_MAYWRITE) { - mutex_lock(&map->freeze_mutex); - map->writecnt++; - mutex_unlock(&map->freeze_mutex); - } + if (vma->vm_flags & VM_MAYWRITE) + bpf_map_write_active_inc(map); } /* called for all unmapped memory region (including initial) */ @@ -600,11 +612,8 @@ static void bpf_map_mmap_close(struct vm_area_struct *vma) { struct bpf_map *map = vma->vm_file->private_data; - if (vma->vm_flags & VM_MAYWRITE) { - mutex_lock(&map->freeze_mutex); - map->writecnt--; - mutex_unlock(&map->freeze_mutex); - } + if (vma->vm_flags & VM_MAYWRITE) + bpf_map_write_active_dec(map); } static const struct vm_operations_struct bpf_map_default_vmops = { @@ -654,7 +663,7 @@ static int bpf_map_mmap(struct file *filp, struct vm_area_struct *vma) goto out; if (vma->vm_flags & VM_MAYWRITE) - map->writecnt++; + bpf_map_write_active_inc(map); out: mutex_unlock(&map->freeze_mutex); return err; @@ -1086,6 +1095,7 @@ static int map_update_elem(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); + bpf_map_write_active_inc(map); if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; goto err_put; @@ -1127,6 +1137,7 @@ static int map_update_elem(union bpf_attr *attr) free_key: kfree(key); err_put: + bpf_map_write_active_dec(map); fdput(f); return err; } @@ -1149,6 +1160,7 @@ static int map_delete_elem(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); + bpf_map_write_active_inc(map); if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; goto err_put; @@ -1179,6 +1191,7 @@ static int map_delete_elem(union bpf_attr *attr) out: kfree(key); err_put: + bpf_map_write_active_dec(map); fdput(f); return err; } @@ -1483,6 +1496,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); + bpf_map_write_active_inc(map); if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) || !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; @@ -1524,6 +1538,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) free_key: kfree(key); err_put: + bpf_map_write_active_dec(map); fdput(f); return err; } @@ -1550,8 +1565,7 @@ static int map_freeze(const union bpf_attr *attr) } mutex_lock(&map->freeze_mutex); - - if (map->writecnt) { + if (bpf_map_write_active(map)) { err = -EBUSY; goto err_put; } @@ -3976,6 +3990,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr, union bpf_attr __user *uattr, int cmd) { + bool has_read = cmd == BPF_MAP_LOOKUP_BATCH || + cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH; + bool has_write = cmd != BPF_MAP_LOOKUP_BATCH; struct bpf_map *map; int err, ufd; struct fd f; @@ -3988,16 +4005,13 @@ static int bpf_map_do_batch(const union bpf_attr *attr, map = __bpf_map_get(f); if (IS_ERR(map)) return PTR_ERR(map); - - if ((cmd == BPF_MAP_LOOKUP_BATCH || - cmd == BPF_MAP_LOOKUP_AND_DELETE_BATCH) && - !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { + if (has_write) + bpf_map_write_active_inc(map); + if (has_read && !(map_get_sys_perms(map, f) & FMODE_CAN_READ)) { err = -EPERM; goto err_put; } - - if (cmd != BPF_MAP_LOOKUP_BATCH && - !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { + if (has_write && !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) { err = -EPERM; goto err_put; } @@ -4010,8 +4024,9 @@ static int bpf_map_do_batch(const union bpf_attr *attr, BPF_DO_BATCH(map->ops->map_update_batch); else BPF_DO_BATCH(map->ops->map_delete_batch); - err_put: + if (has_write) + bpf_map_write_active_dec(map); fdput(f); return err; } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 0c26757ea7fb..4215c2ff6aeb 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -3486,7 +3486,22 @@ static void coerce_reg_to_size(struct bpf_reg_state *reg, int size) static bool bpf_map_is_rdonly(const struct bpf_map *map) { - return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen; + /* A map is considered read-only if the following condition are true: + * + * 1) BPF program side cannot change any of the map content. The + * BPF_F_RDONLY_PROG flag is throughout the lifetime of a map + * and was set at map creation time. + * 2) The map value(s) have been initialized from user space by a + * loader and then "frozen", such that no new map update/delete + * operations from syscall side are possible for the rest of + * the map's lifetime from that point onwards. + * 3) Any parallel/pending map update/delete operations from syscall + * side have been completed. Only after that point, it's safe to + * assume that map value(s) are immutable. + */ + return (map->map_flags & BPF_F_RDONLY_PROG) && + READ_ONCE(map->frozen) && + !bpf_map_write_active(map); } static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val) -- 2.25.1

From: Nadav Amit <namit@vmware.com> stable inclusion from stable-5.10.82 commit 40bc831ab5f630431010d1ff867390b07418a7ee category: bugfix issue: #I4NRS5 CVE: CVE-2021-4002 Reference: https://git.kernel.org/pub/scm/linux/kernel/git/stable/linux.git/commit/?id=... ----------------------------------------------- commit a4a118f2eead1d6c49e00765de89878288d4b890 upstream. When __unmap_hugepage_range() calls to huge_pmd_unshare() succeed, a TLB flush is missing. This TLB flush must be performed before releasing the i_mmap_rwsem, in order to prevent an unshared PMDs page from being released and reused before the TLB flush took place. Arguably, a comprehensive solution would use mmu_gather interface to batch the TLB flushes and the PMDs page release, however it is not an easy solution: (1) try_to_unmap_one() and try_to_migrate_one() also call huge_pmd_unshare() and they cannot use the mmu_gather interface; and (2) deferring the release of the page reference for the PMDs page until after i_mmap_rwsem is dropeed can confuse huge_pmd_unshare() into thinking PMDs are shared when they are not. Fix __unmap_hugepage_range() by adding the missing TLB flush, and forcing a flush when unshare is successful. Fixes: 24669e58477e ("hugetlb: use mmu_gather instead of a temporary linked list for accumulating pages)" # 3.6 Signed-off-by: Nadav Amit <namit@vmware.com> Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com> Cc: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> Cc: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> Signed-off-by: Liu Shixin <liushixin2@huawei.com> Reviewed-by: Kefeng Wang <wangkefeng.wang@huawei.com> Signed-off-by: Chen Jun <chenjun102@huawei.com> Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> Signed-off-by: Yu Changchun <yuchangchun1@huawei.com> --- mm/hugetlb.c | 23 +++++++++++++++++++---- 1 file changed, 19 insertions(+), 4 deletions(-) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 43763d58a77a..17679e80fbc4 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3914,6 +3914,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); struct mmu_notifier_range range; + bool force_flush = false; WARN_ON(!is_vm_hugetlb_page(vma)); BUG_ON(start & ~huge_page_mask(h)); @@ -3942,10 +3943,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, ptl = huge_pte_lock(h, mm, ptep); if (huge_pmd_unshare(mm, vma, &address, ptep)) { spin_unlock(ptl); - /* - * We just unmapped a page of PMDs by clearing a PUD. - * The caller's TLB flush range should cover this area. - */ + tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE); + force_flush = true; continue; } @@ -4002,6 +4001,22 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, } mmu_notifier_invalidate_range_end(&range); tlb_end_vma(tlb, vma); + + /* + * If we unshared PMDs, the TLB flush was not recorded in mmu_gather. We + * could defer the flush until now, since by holding i_mmap_rwsem we + * guaranteed that the last refernece would not be dropped. But we must + * do the flushing before we return, as otherwise i_mmap_rwsem will be + * dropped and the last reference to the shared PMDs page might be + * dropped as well. + * + * In theory we could defer the freeing of the PMD pages as well, but + * huge_pmd_unshare() relies on the exact page_count for the PMD page to + * detect sharing, so we cannot defer the release of the page either. + * Instead, do flush now. + */ + if (force_flush) + tlb_flush_mmu_tlbonly(tlb); } void __unmap_hugepage_range_final(struct mmu_gather *tlb, -- 2.25.1
participants (2)
-
weiyongjun (A)
-
yiyuchangchun@126.com